diff options
Diffstat (limited to 'third_party/aom/av1/encoder')
53 files changed, 6143 insertions, 7582 deletions
diff --git a/third_party/aom/av1/encoder/aq_cyclicrefresh.c b/third_party/aom/av1/encoder/aq_cyclicrefresh.c index e41c608b6..b2b410617 100644 --- a/third_party/aom/av1/encoder/aq_cyclicrefresh.c +++ b/third_party/aom/av1/encoder/aq_cyclicrefresh.c @@ -353,8 +353,8 @@ void av1_cyclic_refresh_check_golden_update(AV1_COMP *const cpi) { // frame because of the camera movement, set this frame as the golden frame. // Use 70% and 5% as the thresholds for golden frame refreshing. // Also, force this frame as a golden update frame if this frame will change - // the resolution (resize_pending != 0). - if (cpi->resize_pending != 0 || + // the resolution (av1_resize_pending != 0). + if (av1_resize_pending(cpi) || (cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1)) { av1_cyclic_refresh_set_golden_update(cpi); rc->frames_till_gf_update_due = rc->baseline_gf_interval; diff --git a/third_party/aom/av1/encoder/av1_quantize.c b/third_party/aom/av1/encoder/av1_quantize.c index 6cffac264..63727df1f 100644 --- a/third_party/aom/av1/encoder/av1_quantize.c +++ b/third_party/aom/av1/encoder/av1_quantize.c @@ -1594,50 +1594,48 @@ static int get_qzbin_factor(int q, aom_bit_depth_t bit_depth) { #endif } -void av1_init_quantizer(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - QUANTS *const quants = &cpi->quants; +void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q, + int uv_dc_delta_q, int uv_ac_delta_q, + QUANTS *const quants, Dequants *const deq) { int i, q, quant; -#if CONFIG_NEW_QUANT - int dq; -#endif for (q = 0; q < QINDEX_RANGE; q++) { - const int qzbin_factor = get_qzbin_factor(q, cm->bit_depth); + const int qzbin_factor = get_qzbin_factor(q, bit_depth); const int qrounding_factor = q == 0 ? 64 : 48; for (i = 0; i < 2; ++i) { int qrounding_factor_fp = 64; // y - quant = i == 0 ? av1_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth) - : av1_ac_quant(q, 0, cm->bit_depth); + quant = i == 0 ? av1_dc_quant(q, y_dc_delta_q, bit_depth) + : av1_ac_quant(q, 0, bit_depth); invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant); quants->y_quant_fp[q][i] = (1 << 16) / quant; quants->y_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7; quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); quants->y_round[q][i] = (qrounding_factor * quant) >> 7; - cpi->y_dequant[q][i] = quant; + deq->y_dequant[q][i] = quant; // uv - quant = i == 0 ? av1_dc_quant(q, cm->uv_dc_delta_q, cm->bit_depth) - : av1_ac_quant(q, cm->uv_ac_delta_q, cm->bit_depth); + quant = i == 0 ? av1_dc_quant(q, uv_dc_delta_q, bit_depth) + : av1_ac_quant(q, uv_ac_delta_q, bit_depth); invert_quant(&quants->uv_quant[q][i], &quants->uv_quant_shift[q][i], quant); quants->uv_quant_fp[q][i] = (1 << 16) / quant; quants->uv_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7; quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); quants->uv_round[q][i] = (qrounding_factor * quant) >> 7; - cpi->uv_dequant[q][i] = quant; + deq->uv_dequant[q][i] = quant; } #if CONFIG_NEW_QUANT + int dq; for (dq = 0; dq < QUANT_PROFILES; dq++) { for (i = 0; i < COEF_BANDS; i++) { - const int y_quant = cpi->y_dequant[q][i != 0]; - const int uvquant = cpi->uv_dequant[q][i != 0]; - av1_get_dequant_val_nuq(y_quant, i, cpi->y_dequant_val_nuq[dq][q][i], + const int y_quant = deq->y_dequant[q][i != 0]; + const int uvquant = deq->uv_dequant[q][i != 0]; + av1_get_dequant_val_nuq(y_quant, i, deq->y_dequant_val_nuq[dq][q][i], quants->y_cuml_bins_nuq[dq][q][i], dq); - av1_get_dequant_val_nuq(uvquant, i, cpi->uv_dequant_val_nuq[dq][q][i], + av1_get_dequant_val_nuq(uvquant, i, deq->uv_dequant_val_nuq[dq][q][i], quants->uv_cuml_bins_nuq[dq][q][i], dq); } } @@ -1650,7 +1648,7 @@ void av1_init_quantizer(AV1_COMP *cpi) { quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1]; quants->y_zbin[q][i] = quants->y_zbin[q][1]; quants->y_round[q][i] = quants->y_round[q][1]; - cpi->y_dequant[q][i] = cpi->y_dequant[q][1]; + deq->y_dequant[q][i] = deq->y_dequant[q][1]; quants->uv_quant[q][i] = quants->uv_quant[q][1]; quants->uv_quant_fp[q][i] = quants->uv_quant_fp[q][1]; @@ -1658,11 +1656,19 @@ void av1_init_quantizer(AV1_COMP *cpi) { quants->uv_quant_shift[q][i] = quants->uv_quant_shift[q][1]; quants->uv_zbin[q][i] = quants->uv_zbin[q][1]; quants->uv_round[q][i] = quants->uv_round[q][1]; - cpi->uv_dequant[q][i] = cpi->uv_dequant[q][1]; + deq->uv_dequant[q][i] = deq->uv_dequant[q][1]; } } } +void av1_init_quantizer(AV1_COMP *cpi) { + AV1_COMMON *const cm = &cpi->common; + QUANTS *const quants = &cpi->quants; + Dequants *const dequants = &cpi->dequants; + av1_build_quantizer(cm->bit_depth, cm->y_dc_delta_q, cm->uv_dc_delta_q, + cm->uv_ac_delta_q, quants, dequants); +} + void av1_init_plane_quantizers(const AV1_COMP *cpi, MACROBLOCK *x, int segment_id) { const AV1_COMMON *const cm = &cpi->common; @@ -1712,11 +1718,12 @@ void av1_init_plane_quantizers(const AV1_COMP *cpi, MACROBLOCK *x, memcpy(&xd->plane[0].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][0], sizeof(cm->giqmatrix[qmlevel][0])); #endif - xd->plane[0].dequant = cpi->y_dequant[qindex]; + xd->plane[0].dequant = cpi->dequants.y_dequant[qindex]; #if CONFIG_NEW_QUANT for (dq = 0; dq < QUANT_PROFILES; dq++) { x->plane[0].cuml_bins_nuq[dq] = quants->y_cuml_bins_nuq[dq][qindex]; - xd->plane[0].dequant_val_nuq[dq] = cpi->y_dequant_val_nuq[dq][qindex]; + xd->plane[0].dequant_val_nuq[dq] = + cpi->dequants.y_dequant_val_nuq[dq][qindex]; } #endif // CONFIG_NEW_QUANT @@ -1734,11 +1741,12 @@ void av1_init_plane_quantizers(const AV1_COMP *cpi, MACROBLOCK *x, memcpy(&xd->plane[i].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][1], sizeof(cm->giqmatrix[qmlevel][1])); #endif - xd->plane[i].dequant = cpi->uv_dequant[qindex]; + xd->plane[i].dequant = cpi->dequants.uv_dequant[qindex]; #if CONFIG_NEW_QUANT for (dq = 0; dq < QUANT_PROFILES; dq++) { x->plane[i].cuml_bins_nuq[dq] = quants->uv_cuml_bins_nuq[dq][qindex]; - xd->plane[i].dequant_val_nuq[dq] = cpi->uv_dequant_val_nuq[dq][qindex]; + xd->plane[i].dequant_val_nuq[dq] = + cpi->dequants.uv_dequant_val_nuq[dq][qindex]; } #endif // CONFIG_NEW_QUANT } diff --git a/third_party/aom/av1/encoder/av1_quantize.h b/third_party/aom/av1/encoder/av1_quantize.h index c87b6b7dc..4bc9cccc2 100644 --- a/third_party/aom/av1/encoder/av1_quantize.h +++ b/third_party/aom/av1/encoder/av1_quantize.h @@ -69,6 +69,17 @@ typedef struct { DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]); } QUANTS; +typedef struct { + DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]); // 8: SIMD width + DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]); // 8: SIMD width +#if CONFIG_NEW_QUANT + DECLARE_ALIGNED(16, dequant_val_type_nuq, + y_dequant_val_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS]); + DECLARE_ALIGNED(16, dequant_val_type_nuq, + uv_dequant_val_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS]); +#endif // CONFIG_NEW_QUANT +} Dequants; + struct AV1_COMP; struct AV1Common; @@ -77,6 +88,10 @@ void av1_frame_init_quantizer(struct AV1_COMP *cpi); void av1_init_plane_quantizers(const struct AV1_COMP *cpi, MACROBLOCK *x, int segment_id); +void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q, + int uv_dc_delta_q, int uv_ac_delta_q, + QUANTS *const quants, Dequants *const deq); + void av1_init_quantizer(struct AV1_COMP *cpi); void av1_set_quantizer(struct AV1Common *cm, int q); diff --git a/third_party/aom/av1/encoder/bitstream.c b/third_party/aom/av1/encoder/bitstream.c index 7cc6179ea..f8378b14d 100644 --- a/third_party/aom/av1/encoder/bitstream.c +++ b/third_party/aom/av1/encoder/bitstream.c @@ -63,21 +63,12 @@ static struct av1_token intra_mode_encodings[INTRA_MODES]; static struct av1_token switchable_interp_encodings[SWITCHABLE_FILTERS]; -#if CONFIG_EXT_PARTITION_TYPES && !CONFIG_EC_MULTISYMBOL -static const struct av1_token ext_partition_encodings[EXT_PARTITION_TYPES] = { - { 0, 1 }, { 4, 3 }, { 12, 4 }, { 7, 3 }, - { 10, 4 }, { 11, 4 }, { 26, 5 }, { 27, 5 } -}; -#endif static struct av1_token partition_encodings[PARTITION_TYPES]; -#if !CONFIG_REF_MV -static struct av1_token inter_mode_encodings[INTER_MODES]; -#endif #if CONFIG_EXT_INTER static const struct av1_token inter_compound_mode_encodings[INTER_COMPOUND_MODES] = { - { 2, 2 }, { 50, 6 }, { 51, 6 }, { 24, 5 }, { 52, 6 }, - { 53, 6 }, { 54, 6 }, { 55, 6 }, { 0, 1 }, { 7, 3 } + { 2, 2 }, { 12, 4 }, { 52, 6 }, { 53, 6 }, + { 54, 6 }, { 55, 6 }, { 0, 1 }, { 7, 3 } }; #endif // CONFIG_EXT_INTER #if CONFIG_PALETTE @@ -85,16 +76,6 @@ static struct av1_token palette_size_encodings[PALETTE_SIZES]; static struct av1_token palette_color_index_encodings[PALETTE_SIZES] [PALETTE_COLORS]; #endif // CONFIG_PALETTE -#if !CONFIG_EC_MULTISYMBOL -static const struct av1_token tx_size_encodings[MAX_TX_DEPTH][TX_SIZES] = { - { { 0, 1 }, { 1, 1 } }, // Max tx_size is 8X8 - { { 0, 1 }, { 2, 2 }, { 3, 2 } }, // Max tx_size is 16X16 - { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 } }, // Max tx_size is 32X32 -#if CONFIG_TX64X64 - { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 14, 4 }, { 15, 4 } }, // Max tx_size 64X64 -#endif // CONFIG_TX64X64 -}; -#endif #if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE static INLINE void write_uniform(aom_writer *w, int n, int v) { @@ -125,7 +106,9 @@ static struct av1_token intra_filter_encodings[INTRA_FILTERS]; #endif // CONFIG_INTRA_INTERP #endif // CONFIG_EXT_INTRA #if CONFIG_EXT_INTER +#if CONFIG_INTERINTRA static struct av1_token interintra_mode_encodings[INTERINTRA_MODES]; +#endif #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE static struct av1_token compound_type_encodings[COMPOUND_TYPES]; #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE @@ -162,9 +145,6 @@ void av1_encode_token_init(void) { av1_tokens_from_tree(intra_mode_encodings, av1_intra_mode_tree); av1_tokens_from_tree(switchable_interp_encodings, av1_switchable_interp_tree); av1_tokens_from_tree(partition_encodings, av1_partition_tree); -#if !CONFIG_REF_MV - av1_tokens_from_tree(inter_mode_encodings, av1_inter_mode_tree); -#endif #if CONFIG_PALETTE av1_tokens_from_tree(palette_size_encodings, av1_palette_size_tree); @@ -178,7 +158,9 @@ void av1_encode_token_init(void) { av1_tokens_from_tree(intra_filter_encodings, av1_intra_filter_tree); #endif // CONFIG_EXT_INTRA && CONFIG_INTRA_INTERP #if CONFIG_EXT_INTER +#if CONFIG_INTERINTRA av1_tokens_from_tree(interintra_mode_encodings, av1_interintra_mode_tree); +#endif // CONFIG_INTERINTRA #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE av1_tokens_from_tree(compound_type_encodings, av1_compound_type_tree); #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE @@ -195,7 +177,6 @@ void av1_encode_token_init(void) { av1_switchable_restore_tree); #endif // CONFIG_LOOP_RESTORATION -#if CONFIG_EC_MULTISYMBOL /* This hack is necessary when CONFIG_DUAL_FILTER is enabled because the five SWITCHABLE_FILTERS are not consecutive, e.g., 0, 1, 2, 3, 4, when doing an in-order traversal of the av1_switchable_interp_tree structure. */ @@ -218,7 +199,6 @@ void av1_encode_token_init(void) { av1_intra_mode_tree); av1_indices_from_tree(av1_inter_mode_ind, av1_inter_mode_inv, av1_inter_mode_tree); -#endif } static void write_intra_mode_kf(const AV1_COMMON *cm, FRAME_CONTEXT *frame_ctx, @@ -228,37 +208,28 @@ static void write_intra_mode_kf(const AV1_COMMON *cm, FRAME_CONTEXT *frame_ctx, #if CONFIG_INTRABC assert(!is_intrabc_block(&mi->mbmi)); #endif // CONFIG_INTRABC -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, av1_intra_mode_ind[mode], get_y_mode_cdf(frame_ctx, mi, above_mi, left_mi, block), INTRA_MODES); (void)cm; -#else - av1_write_token(w, av1_intra_mode_tree, - get_y_mode_probs(cm, mi, above_mi, left_mi, block), - &intra_mode_encodings[mode]); - (void)frame_ctx; -#endif } -#if CONFIG_EXT_INTER +#if CONFIG_EXT_INTER && CONFIG_INTERINTRA static void write_interintra_mode(aom_writer *w, INTERINTRA_MODE mode, const aom_prob *probs) { av1_write_token(w, av1_interintra_mode_tree, probs, &interintra_mode_encodings[mode]); } -#endif // CONFIG_EXT_INTER +#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA static void write_inter_mode(aom_writer *w, PREDICTION_MODE mode, FRAME_CONTEXT *ec_ctx, const int16_t mode_ctx) { -#if CONFIG_REF_MV const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK; const aom_prob newmv_prob = ec_ctx->newmv_prob[newmv_ctx]; -#define IS_NEWMV_MODE(mode) ((mode) == NEWMV) - aom_write(w, !IS_NEWMV_MODE(mode), newmv_prob); + aom_write(w, mode != NEWMV, newmv_prob); - if (!IS_NEWMV_MODE(mode)) { + if (mode != NEWMV) { const int16_t zeromv_ctx = (mode_ctx >> ZEROMV_OFFSET) & ZEROMV_CTX_MASK; const aom_prob zeromv_prob = ec_ctx->zeromv_prob[zeromv_ctx]; @@ -281,25 +252,8 @@ static void write_inter_mode(aom_writer *w, PREDICTION_MODE mode, aom_write(w, mode != NEARESTMV, refmv_prob); } } - -#undef IS_NEWMV_MODE - -#else // !CONFIG_REF_MV - assert(is_inter_mode(mode)); -#if CONFIG_EC_MULTISYMBOL - aom_write_symbol(w, av1_inter_mode_ind[INTER_OFFSET(mode)], - ec_ctx->inter_mode_cdf[mode_ctx], INTER_MODES); -#else - { - const aom_prob *const inter_probs = ec_ctx->inter_mode_probs[mode_ctx]; - av1_write_token(w, av1_inter_mode_tree, inter_probs, - &inter_mode_encodings[INTER_OFFSET(mode)]); - } -#endif -#endif } -#if CONFIG_REF_MV static void write_drl_idx(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext, aom_writer *w) { uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); @@ -341,7 +295,6 @@ static void write_drl_idx(const AV1_COMMON *cm, const MB_MODE_INFO *mbmi, return; } } -#endif #if CONFIG_EXT_INTER static void write_inter_compound_mode(AV1_COMMON *cm, aom_writer *w, @@ -409,22 +362,22 @@ static void write_tx_size_vartx(const AV1_COMMON *cm, const MACROBLOCKD *xd, const int max_blocks_high = max_block_high(xd, mbmi->sb_type, 0); const int max_blocks_wide = max_block_wide(xd, mbmi->sb_type, 0); - int ctx = txfm_partition_context(xd->above_txfm_context + tx_col, - xd->left_txfm_context + tx_row, + int ctx = txfm_partition_context(xd->above_txfm_context + blk_col, + xd->left_txfm_context + blk_row, mbmi->sb_type, tx_size); if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; if (depth == MAX_VARTX_DEPTH) { - txfm_partition_update(xd->above_txfm_context + tx_col, - xd->left_txfm_context + tx_row, tx_size, tx_size); + txfm_partition_update(xd->above_txfm_context + blk_col, + xd->left_txfm_context + blk_row, tx_size, tx_size); return; } if (tx_size == mbmi->inter_tx_size[tx_row][tx_col]) { aom_write(w, 0, cm->fc->txfm_partition_prob[ctx]); - txfm_partition_update(xd->above_txfm_context + tx_col, - xd->left_txfm_context + tx_row, tx_size, tx_size); + txfm_partition_update(xd->above_txfm_context + blk_col, + xd->left_txfm_context + blk_row, tx_size, tx_size); } else { const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; const int bsl = tx_size_wide_unit[sub_txs]; @@ -433,8 +386,8 @@ static void write_tx_size_vartx(const AV1_COMMON *cm, const MACROBLOCKD *xd, aom_write(w, 1, cm->fc->txfm_partition_prob[ctx]); if (tx_size == TX_8X8) { - txfm_partition_update(xd->above_txfm_context + tx_col, - xd->left_txfm_context + tx_row, sub_txs, tx_size); + txfm_partition_update(xd->above_txfm_context + blk_col, + xd->left_txfm_context + blk_row, sub_txs, tx_size); return; } @@ -482,22 +435,18 @@ static void write_selected_tx_size(const AV1_COMMON *cm, const MACROBLOCKD *xd, const int depth = tx_size_to_depth(coded_tx_size); #if CONFIG_EXT_TX && CONFIG_RECT_TX assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi))); - assert( - IMPLIES(is_rect_tx(tx_size), tx_size == max_txsize_rect_lookup[bsize])); #endif // CONFIG_EXT_TX && CONFIG_RECT_TX -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, depth, ec_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx], tx_size_cat + 2); -#else - av1_write_token(w, av1_tx_size_tree[tx_size_cat], - ec_ctx->tx_size_probs[tx_size_cat][tx_size_ctx], - &tx_size_encodings[tx_size_cat][depth]); -#endif +#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT + if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size) + aom_write(w, tx_size == quarter_txsize_lookup[bsize], + cm->fc->quarter_tx_size_prob); +#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT } } -#if CONFIG_REF_MV static void update_inter_mode_probs(AV1_COMMON *cm, aom_writer *w, FRAME_COUNTS *counts) { int i; @@ -519,7 +468,6 @@ static void update_inter_mode_probs(AV1_COMMON *cm, aom_writer *w, av1_cond_prob_diff_update(w, &cm->fc->drl_prob[i], counts->drl_mode[i], probwt); } -#endif #if CONFIG_EXT_INTER static void update_inter_compound_mode_probs(AV1_COMMON *cm, int probwt, @@ -598,17 +546,8 @@ static void write_delta_qindex(const AV1_COMMON *cm, const MACROBLOCKD *xd, (void)xd; #endif -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, AOMMIN(abs, DELTA_Q_SMALL), ec_ctx->delta_q_cdf, DELTA_Q_PROBS + 1); -#else - int i = 0; - while (i < DELTA_Q_SMALL && i <= abs) { - int bit = (i < abs); - aom_write(w, bit, ec_ctx->delta_q_prob[i]); - i++; - } -#endif if (!smallval) { rem_bits = OD_ILOG_NZ(abs - 1) - 1; @@ -655,17 +594,8 @@ static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd, (void)xd; #endif -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, AOMMIN(abs, DELTA_LF_SMALL), ec_ctx->delta_lf_cdf, DELTA_LF_PROBS + 1); -#else - int i = 0; - while (i < DELTA_LF_SMALL && i <= abs) { - int bit = (i < abs); - aom_write(w, bit, ec_ctx->delta_lf_prob[i]); - i++; - } -#endif // CONFIG_EC_MULTISYMBOL if (!smallval) { rem_bits = OD_ILOG_NZ(abs - 1) - 1; @@ -908,7 +838,7 @@ static INLINE void write_coeff_extra(const aom_prob *pb, int value, } #endif -#if CONFIG_NEW_TOKENSET && !CONFIG_LV_MAP +#if !CONFIG_LV_MAP static void pack_mb_tokens(aom_writer *w, const TOKENEXTRA **tp, const TOKENEXTRA *const stop, aom_bit_depth_t bit_depth, const TX_SIZE tx_size, @@ -921,18 +851,22 @@ static void pack_mb_tokens(aom_writer *w, const TOKENEXTRA **tp, while (p < stop && p->token != EOSB_TOKEN) { const int token = p->token; + const int eob_val = p->eob_val; if (token == BLOCK_Z_TOKEN) { aom_write_symbol(w, 0, *p->head_cdf, HEAD_TOKENS + 1); p++; +#if CONFIG_VAR_TX + break; +#endif continue; } const av1_extra_bit *const extra_bits = &av1_extra_bits[token]; - if (p->eob_val == LAST_EOB) { + if (eob_val == LAST_EOB) { // Just code a flag indicating whether the value is >1 or 1. aom_write_bit(w, token != ONE_TOKEN); } else { - int comb_symb = 2 * AOMMIN(token, TWO_TOKEN) - p->eob_val + p->first_val; + int comb_symb = 2 * AOMMIN(token, TWO_TOKEN) - eob_val + p->first_val; aom_write_symbol(w, comb_symb, *p->head_cdf, HEAD_TOKENS + p->first_val); } if (token > ONE_TOKEN) { @@ -966,104 +900,13 @@ static void pack_mb_tokens(aom_writer *w, const TOKENEXTRA **tp, #if CONFIG_VAR_TX ++count; - if (token == EOB_TOKEN || count == seg_eob) break; -#endif - } - - *tp = p; -} -#else // CONFIG_NEW_TOKENSET -#if !CONFIG_LV_MAP -static void pack_mb_tokens(aom_writer *w, const TOKENEXTRA **tp, - const TOKENEXTRA *const stop, - aom_bit_depth_t bit_depth, const TX_SIZE tx_size, - TOKEN_STATS *token_stats) { - const TOKENEXTRA *p = *tp; -#if CONFIG_VAR_TX - int count = 0; - const int seg_eob = tx_size_2d[tx_size]; -#endif - - while (p < stop && p->token != EOSB_TOKEN) { - const int token = p->token; -#if !CONFIG_EC_MULTISYMBOL - const struct av1_token *const coef_encoding = &av1_coef_encodings[token]; - int coef_value = coef_encoding->value; - int coef_length = coef_encoding->len; -#endif // !CONFIG_EC_MULTISYMBOL - const av1_extra_bit *const extra_bits = &av1_extra_bits[token]; - -#if CONFIG_EC_MULTISYMBOL - /* skip one or two nodes */ - if (!p->skip_eob_node) - aom_write_record(w, token != EOB_TOKEN, p->context_tree[0], token_stats); - if (token != EOB_TOKEN) { - aom_write_record(w, token != ZERO_TOKEN, p->context_tree[1], token_stats); - if (token != ZERO_TOKEN) { - aom_write_symbol(w, token - ONE_TOKEN, *p->token_cdf, - CATEGORY6_TOKEN - ONE_TOKEN + 1); - } - } -#else - /* skip one or two nodes */ - if (p->skip_eob_node) - coef_length -= p->skip_eob_node; - else - aom_write_record(w, token != EOB_TOKEN, p->context_tree[0], token_stats); - - if (token != EOB_TOKEN) { - aom_write_record(w, token != ZERO_TOKEN, p->context_tree[1], token_stats); - - if (token != ZERO_TOKEN) { - aom_write_record(w, token != ONE_TOKEN, p->context_tree[2], - token_stats); - - if (token != ONE_TOKEN) { - const int unconstrained_len = UNCONSTRAINED_NODES - p->skip_eob_node; - aom_write_tree_record( - w, av1_coef_con_tree, - av1_pareto8_full[p->context_tree[PIVOT_NODE] - 1], coef_value, - coef_length - unconstrained_len, 0, token_stats); - } - } - } -#endif // CONFIG_EC_MULTISYMBOL - - if (extra_bits->base_val) { - const int bit_string = p->extra; - const int bit_string_length = extra_bits->len; // Length of extra bits to - // be written excluding - // the sign bit. - int skip_bits = (extra_bits->base_val == CAT6_MIN_VAL) - ? (int)sizeof(av1_cat6_prob) - - av1_get_cat6_extrabits_size(tx_size, bit_depth) - : 0; - - assert(!(bit_string >> (bit_string_length - skip_bits + 1))); - if (bit_string_length > 0) { -#if CONFIG_NEW_MULTISYMBOL - skip_bits &= ~3; - write_coeff_extra(extra_bits->cdf, bit_string >> 1, - bit_string_length - skip_bits, w); -#else - write_coeff_extra(extra_bits->prob, bit_string >> 1, bit_string_length, - skip_bits, w, token_stats); -#endif - } - aom_write_bit_record(w, bit_string & 1, token_stats); - } - ++p; - -#if CONFIG_VAR_TX - ++count; - if (token == EOB_TOKEN || count == seg_eob) break; + if (eob_val == EARLY_EOB || count == seg_eob) break; #endif } *tp = p; } #endif // !CONFIG_LV_MAP -#endif // CONFIG_NEW_TOKENSET #else // !CONFIG_PVQ static PVQ_INFO *get_pvq_block(PVQ_QUEUE *pvq_q) { PVQ_INFO *pvq; @@ -1150,6 +993,80 @@ static void pack_pvq_tokens(aom_writer *w, MACROBLOCK *const x, #endif // !CONFIG_PVG #if CONFIG_VAR_TX && !CONFIG_COEF_INTERLEAVE +#if CONFIG_LV_MAP +static void pack_txb_tokens(aom_writer *w, +#if CONFIG_LV_MAP + AV1_COMMON *cm, +#endif // CONFIG_LV_MAP + const TOKENEXTRA **tp, + const TOKENEXTRA *const tok_end, +#if CONFIG_PVQ || CONFIG_LV_MAP + MACROBLOCK *const x, +#endif + MACROBLOCKD *xd, MB_MODE_INFO *mbmi, int plane, + BLOCK_SIZE plane_bsize, aom_bit_depth_t bit_depth, + int block, int blk_row, int blk_col, + TX_SIZE tx_size, TOKEN_STATS *token_stats) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + const int tx_row = blk_row >> (1 - pd->subsampling_y); + const int tx_col = blk_col >> (1 - pd->subsampling_x); + TX_SIZE plane_tx_size; + const int max_blocks_high = max_block_high(xd, plane_bsize, plane); + const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane); + + if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; + + plane_tx_size = + plane ? uv_txsize_lookup[bsize][mbmi->inter_tx_size[tx_row][tx_col]][0][0] + : mbmi->inter_tx_size[tx_row][tx_col]; + + if (tx_size == plane_tx_size) { + TOKEN_STATS tmp_token_stats; + init_token_stats(&tmp_token_stats); + +#if !CONFIG_PVQ + tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block); + uint16_t eob = x->mbmi_ext->eobs[plane][block]; + TXB_CTX txb_ctx = { x->mbmi_ext->txb_skip_ctx[plane][block], + x->mbmi_ext->dc_sign_ctx[plane][block] }; + av1_write_coeffs_txb(cm, xd, w, block, plane, tcoeff, eob, &txb_ctx); +#else + pack_pvq_tokens(w, x, xd, plane, bsize, tx_size); +#endif +#if CONFIG_RD_DEBUG + token_stats->txb_coeff_cost_map[blk_row][blk_col] = tmp_token_stats.cost; + token_stats->cost += tmp_token_stats.cost; +#endif + } else { + const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; + const int bsl = tx_size_wide_unit[sub_txs]; + int i; + + assert(bsl > 0); + + for (i = 0; i < 4; ++i) { + const int offsetr = blk_row + (i >> 1) * bsl; + const int offsetc = blk_col + (i & 0x01) * bsl; + const int step = tx_size_wide_unit[sub_txs] * tx_size_high_unit[sub_txs]; + + if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; + + pack_txb_tokens(w, +#if CONFIG_LV_MAP + cm, +#endif + tp, tok_end, +#if CONFIG_PVQ || CONFIG_LV_MAP + x, +#endif + xd, mbmi, plane, plane_bsize, bit_depth, block, offsetr, + offsetc, sub_txs, token_stats); + block += step; + } + } +} +#else // CONFIG_LV_MAP static void pack_txb_tokens(aom_writer *w, const TOKENEXTRA **tp, const TOKENEXTRA *const tok_end, #if CONFIG_PVQ @@ -1209,16 +1126,13 @@ static void pack_txb_tokens(aom_writer *w, const TOKENEXTRA **tp, } } } -#endif +#endif // CONFIG_LV_MAP +#endif // CONFIG_VAR_TX static void write_segment_id(aom_writer *w, const struct segmentation *seg, struct segmentation_probs *segp, int segment_id) { if (seg->enabled && seg->update_map) { -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, segment_id, segp->tree_cdf, MAX_SEGMENTS); -#else - aom_write_tree(w, av1_segment_tree, segp->tree_probs, segment_id, 3, 0); -#endif } } @@ -1242,7 +1156,7 @@ static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd, #if SUB8X8_COMP_REF aom_write(w, is_compound, av1_get_reference_mode_prob(cm, xd)); #else - if (mbmi->sb_type >= BLOCK_8X8) + if (mbmi->sb_type != BLOCK_4X4) aom_write(w, is_compound, av1_get_reference_mode_prob(cm, xd)); #endif } else { @@ -1307,7 +1221,9 @@ static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd, #if CONFIG_FILTER_INTRA static void write_filter_intra_mode_info(const AV1_COMMON *const cm, + const MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi, + int mi_row, int mi_col, aom_writer *w) { if (mbmi->mode == DC_PRED #if CONFIG_PALETTE @@ -1323,6 +1239,17 @@ static void write_filter_intra_mode_info(const AV1_COMMON *const cm, } } +#if CONFIG_CB4X4 + if (!is_chroma_reference(mi_row, mi_col, mbmi->sb_type, + xd->plane[1].subsampling_x, + xd->plane[1].subsampling_y)) + return; +#else + (void)xd; + (void)mi_row; + (void)mi_col; +#endif // CONFIG_CB4X4 + if (mbmi->uv_mode == DC_PRED #if CONFIG_PALETTE && mbmi->palette_mode_info.palette_size[1] == 0 @@ -1358,15 +1285,9 @@ static void write_intra_angle_info(const MACROBLOCKD *xd, #if CONFIG_INTRA_INTERP p_angle = mode_to_angle_map[mbmi->mode] + mbmi->angle_delta[0] * ANGLE_STEP; if (av1_is_intra_filter_switchable(p_angle)) { -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, mbmi->intra_filter, ec_ctx->intra_filter_cdf[intra_filter_ctx], INTRA_FILTERS); -#else - av1_write_token(w, av1_intra_filter_tree, - ec_ctx->intra_filter_probs[intra_filter_ctx], - &intra_filter_encodings[mbmi->intra_filter]); -#endif // CONFIG_EC_MULTISYMBOL } #endif // CONFIG_INTRA_INTERP } @@ -1409,15 +1330,9 @@ static void write_mb_interp_filter(AV1_COMP *cpi, const MACROBLOCKD *xd, (mbmi->ref_frame[1] > INTRA_FRAME && has_subpel_mv_component(xd->mi[0], xd, dir + 2))) { const int ctx = av1_get_pred_context_switchable_interp(xd, dir); -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, av1_switchable_interp_ind[mbmi->interp_filter[dir]], ec_ctx->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS); -#else - av1_write_token(w, av1_switchable_interp_tree, - ec_ctx->switchable_interp_prob[ctx], - &switchable_interp_encodings[mbmi->interp_filter[dir]]); -#endif ++cpi->interp_filter_selected[0][mbmi->interp_filter[dir]]; } else { assert(mbmi->interp_filter[dir] == EIGHTTAP_REGULAR); @@ -1426,14 +1341,8 @@ static void write_mb_interp_filter(AV1_COMP *cpi, const MACROBLOCKD *xd, #else { const int ctx = av1_get_pred_context_switchable_interp(xd); -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, av1_switchable_interp_ind[mbmi->interp_filter], ec_ctx->switchable_interp_cdf[ctx], SWITCHABLE_FILTERS); -#else - av1_write_token(w, av1_switchable_interp_tree, - ec_ctx->switchable_interp_prob[ctx], - &switchable_interp_encodings[mbmi->interp_filter]); -#endif ++cpi->interp_filter_selected[0][mbmi->interp_filter]; } #endif // CONFIG_DUAL_FILTER @@ -1442,48 +1351,91 @@ static void write_mb_interp_filter(AV1_COMP *cpi, const MACROBLOCKD *xd, #if CONFIG_PALETTE #if CONFIG_PALETTE_DELTA_ENCODING -// Write luma palette color values with delta encoding. Write the first value as -// literal, and the deltas between each value and the previous one. The luma -// palette is sorted so each delta is larger than 0. -static void write_palette_colors_y(const PALETTE_MODE_INFO *const pmi, - int bit_depth, aom_writer *w) { - const int n = pmi->palette_size[0]; - int min_bits, i; - int bits = av1_get_palette_delta_bits_y(pmi, bit_depth, &min_bits); +// Transmit color values with delta encoding. Write the first value as +// literal, and the deltas between each value and the previous one. "min_val" is +// the smallest possible value of the deltas. +static void delta_encode_palette_colors(const int *colors, int num, + int bit_depth, int min_val, + aom_writer *w) { + if (num <= 0) return; + assert(colors[0] < (1 << bit_depth)); + aom_write_literal(w, colors[0], bit_depth); + if (num == 1) return; + int max_delta = 0; + int deltas[PALETTE_MAX_SIZE]; + memset(deltas, 0, sizeof(deltas)); + for (int i = 1; i < num; ++i) { + assert(colors[i] < (1 << bit_depth)); + const int delta = colors[i] - colors[i - 1]; + deltas[i - 1] = delta; + assert(delta >= min_val); + if (delta > max_delta) max_delta = delta; + } + const int min_bits = bit_depth - 3; + int bits = AOMMAX(av1_ceil_log2(max_delta + 1 - min_val), min_bits); + assert(bits <= bit_depth); + int range = (1 << bit_depth) - colors[0] - min_val; aom_write_literal(w, bits - min_bits, 2); - aom_write_literal(w, pmi->palette_colors[0], bit_depth); - for (i = 1; i < n; ++i) { - aom_write_literal( - w, pmi->palette_colors[i] - pmi->palette_colors[i - 1] - 1, bits); - bits = - AOMMIN(bits, av1_ceil_log2((1 << bit_depth) - pmi->palette_colors[i])); + for (int i = 0; i < num - 1; ++i) { + aom_write_literal(w, deltas[i] - min_val, bits); + range -= deltas[i]; + bits = AOMMIN(bits, av1_ceil_log2(range)); } } -// Write chroma palette color values. Use delta encoding for u channel as its -// palette is sorted. For v channel, either use delta encoding or transmit -// raw values directly, whichever costs less. -static void write_palette_colors_uv(const PALETTE_MODE_INFO *const pmi, +// Transmit luma palette color values. First signal if each color in the color +// cache is used. Those colors that are not in the cache are transmitted with +// delta encoding. +static void write_palette_colors_y(const MACROBLOCKD *const xd, + const PALETTE_MODE_INFO *const pmi, + int bit_depth, aom_writer *w) { + const int n = pmi->palette_size[0]; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + uint16_t color_cache[2 * PALETTE_MAX_SIZE]; + const int n_cache = av1_get_palette_cache(above_mi, left_mi, 0, color_cache); + int out_cache_colors[PALETTE_MAX_SIZE]; + uint8_t cache_color_found[2 * PALETTE_MAX_SIZE]; + const int n_out_cache = + av1_index_color_cache(color_cache, n_cache, pmi->palette_colors, n, + cache_color_found, out_cache_colors); + int n_in_cache = 0; + for (int i = 0; i < n_cache && n_in_cache < n; ++i) { + const int found = cache_color_found[i]; + aom_write_bit(w, found); + n_in_cache += found; + } + assert(n_in_cache + n_out_cache == n); + delta_encode_palette_colors(out_cache_colors, n_out_cache, bit_depth, 1, w); +} + +// Write chroma palette color values. U channel is handled similarly to the luma +// channel. For v channel, either use delta encoding or transmit raw values +// directly, whichever costs less. +static void write_palette_colors_uv(const MACROBLOCKD *const xd, + const PALETTE_MODE_INFO *const pmi, int bit_depth, aom_writer *w) { - int i; const int n = pmi->palette_size[1]; -#if CONFIG_HIGHBITDEPTH const uint16_t *colors_u = pmi->palette_colors + PALETTE_MAX_SIZE; const uint16_t *colors_v = pmi->palette_colors + 2 * PALETTE_MAX_SIZE; -#else - const uint8_t *colors_u = pmi->palette_colors + PALETTE_MAX_SIZE; - const uint8_t *colors_v = pmi->palette_colors + 2 * PALETTE_MAX_SIZE; -#endif // CONFIG_HIGHBITDEPTH // U channel colors. - int min_bits_u = 0; - int bits_u = av1_get_palette_delta_bits_u(pmi, bit_depth, &min_bits_u); - aom_write_literal(w, bits_u - min_bits_u, 2); - aom_write_literal(w, colors_u[0], bit_depth); - for (i = 1; i < n; ++i) { - aom_write_literal(w, colors_u[i] - colors_u[i - 1], bits_u); - bits_u = AOMMIN(bits_u, av1_ceil_log2(1 + (1 << bit_depth) - colors_u[i])); - } - // V channel colors. + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + uint16_t color_cache[2 * PALETTE_MAX_SIZE]; + const int n_cache = av1_get_palette_cache(above_mi, left_mi, 1, color_cache); + int out_cache_colors[PALETTE_MAX_SIZE]; + uint8_t cache_color_found[2 * PALETTE_MAX_SIZE]; + const int n_out_cache = av1_index_color_cache( + color_cache, n_cache, colors_u, n, cache_color_found, out_cache_colors); + int n_in_cache = 0; + for (int i = 0; i < n_cache && n_in_cache < n; ++i) { + const int found = cache_color_found[i]; + aom_write_bit(w, found); + n_in_cache += found; + } + delta_encode_palette_colors(out_cache_colors, n_out_cache, bit_depth, 0, w); + + // V channel colors. Don't use color cache as the colors are not sorted. const int max_val = 1 << bit_depth; int zero_count = 0, min_bits_v = 0; int bits_v = @@ -1492,10 +1444,12 @@ static void write_palette_colors_uv(const PALETTE_MODE_INFO *const pmi, 2 + bit_depth + (bits_v + 1) * (n - 1) - zero_count; const int rate_using_raw = bit_depth * n; if (rate_using_delta < rate_using_raw) { // delta encoding + assert(colors_v[0] < (1 << bit_depth)); aom_write_bit(w, 1); aom_write_literal(w, bits_v - min_bits_v, 2); aom_write_literal(w, colors_v[0], bit_depth); - for (i = 1; i < n; ++i) { + for (int i = 1; i < n; ++i) { + assert(colors_v[i] < (1 << bit_depth)); if (colors_v[i] == colors_v[i - 1]) { // No need to signal sign bit. aom_write_literal(w, 0, bits_v); continue; @@ -1512,7 +1466,10 @@ static void write_palette_colors_uv(const PALETTE_MODE_INFO *const pmi, } } else { // Transmit raw values. aom_write_bit(w, 0); - for (i = 0; i < n; ++i) aom_write_literal(w, colors_v[i], bit_depth); + for (int i = 0; i < n; ++i) { + assert(colors_v[i] < (1 << bit_depth)); + aom_write_literal(w, colors_v[i], bit_depth); + } } } #endif // CONFIG_PALETTE_DELTA_ENCODING @@ -1542,11 +1499,12 @@ static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd, av1_default_palette_y_size_prob[bsize - BLOCK_8X8], &palette_size_encodings[n - PALETTE_MIN_SIZE]); #if CONFIG_PALETTE_DELTA_ENCODING - write_palette_colors_y(pmi, cm->bit_depth, w); + write_palette_colors_y(xd, pmi, cm->bit_depth, w); #else - int i; - for (i = 0; i < n; ++i) + for (int i = 0; i < n; ++i) { + assert(pmi->palette_colors[i] < (1 << cm->bit_depth)); aom_write_literal(w, pmi->palette_colors[i], cm->bit_depth); + } #endif // CONFIG_PALETTE_DELTA_ENCODING write_uniform(w, n, pmi->palette_first_color_idx[0]); } @@ -1561,10 +1519,13 @@ static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd, av1_default_palette_uv_size_prob[bsize - BLOCK_8X8], &palette_size_encodings[n - PALETTE_MIN_SIZE]); #if CONFIG_PALETTE_DELTA_ENCODING - write_palette_colors_uv(pmi, cm->bit_depth, w); + write_palette_colors_uv(xd, pmi, cm->bit_depth, w); #else - int i; - for (i = 0; i < n; ++i) { + for (int i = 0; i < n; ++i) { + assert(pmi->palette_colors[PALETTE_MAX_SIZE + i] < + (1 << cm->bit_depth)); + assert(pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] < + (1 << cm->bit_depth)); aom_write_literal(w, pmi->palette_colors[PALETTE_MAX_SIZE + i], cm->bit_depth); aom_write_literal(w, pmi->palette_colors[2 * PALETTE_MAX_SIZE + i], @@ -1625,30 +1586,17 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd, if (is_inter) { assert(ext_tx_used_inter[eset][tx_type]); if (eset > 0) { -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, av1_ext_tx_inter_ind[eset][tx_type], ec_ctx->inter_ext_tx_cdf[eset][square_tx_size], ext_tx_cnt_inter[eset]); -#else - av1_write_token(w, av1_ext_tx_inter_tree[eset], - ec_ctx->inter_ext_tx_prob[eset][square_tx_size], - &ext_tx_inter_encodings[eset][tx_type]); -#endif } } else if (ALLOW_INTRA_EXT_TX) { assert(ext_tx_used_intra[eset][tx_type]); if (eset > 0) { -#if CONFIG_EC_MULTISYMBOL aom_write_symbol( w, av1_ext_tx_intra_ind[eset][tx_type], ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][mbmi->mode], ext_tx_cnt_intra[eset]); -#else - av1_write_token( - w, av1_ext_tx_intra_tree[eset], - ec_ctx->intra_ext_tx_prob[eset][square_tx_size][mbmi->mode], - &ext_tx_intra_encodings[eset][tx_type]); -#endif } } } @@ -1662,28 +1610,14 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd, #endif // CONFIG_SUPERTX !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { if (is_inter) { -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, av1_ext_tx_ind[tx_type], ec_ctx->inter_ext_tx_cdf[tx_size], TX_TYPES); -#else - av1_write_token(w, av1_ext_tx_tree, ec_ctx->inter_ext_tx_prob[tx_size], - &ext_tx_encodings[tx_type]); -#endif } else { -#if CONFIG_EC_MULTISYMBOL aom_write_symbol( w, av1_ext_tx_ind[tx_type], ec_ctx->intra_ext_tx_cdf[tx_size] [intra_mode_to_tx_type_context[mbmi->mode]], TX_TYPES); -#else - av1_write_token( - w, av1_ext_tx_tree, - ec_ctx - ->intra_ext_tx_prob[tx_size] - [intra_mode_to_tx_type_context[mbmi->mode]], - &ext_tx_encodings[tx_type]); -#endif } } #endif // CONFIG_EXT_TX @@ -1692,29 +1626,45 @@ void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd, static void write_intra_mode(FRAME_CONTEXT *frame_ctx, BLOCK_SIZE bsize, PREDICTION_MODE mode, aom_writer *w) { -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, av1_intra_mode_ind[mode], frame_ctx->y_mode_cdf[size_group_lookup[bsize]], INTRA_MODES); -#else - av1_write_token(w, av1_intra_mode_tree, - frame_ctx->y_mode_prob[size_group_lookup[bsize]], - &intra_mode_encodings[mode]); -#endif } static void write_intra_uv_mode(FRAME_CONTEXT *frame_ctx, PREDICTION_MODE uv_mode, PREDICTION_MODE y_mode, aom_writer *w) { -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, av1_intra_mode_ind[uv_mode], frame_ctx->uv_mode_cdf[y_mode], INTRA_MODES); -#else - av1_write_token(w, av1_intra_mode_tree, frame_ctx->uv_mode_prob[y_mode], - &intra_mode_encodings[uv_mode]); -#endif } +#if CONFIG_CFL +static void write_cfl_alphas(FRAME_CONTEXT *const frame_ctx, int skip, int ind, + const CFL_SIGN_TYPE signs[CFL_SIGNS], + aom_writer *w) { + if (skip) { + assert(ind == 0); + assert(signs[CFL_PRED_U] == CFL_SIGN_POS); + assert(signs[CFL_PRED_V] == CFL_SIGN_POS); + } else { + // Check for uninitialized signs + if (cfl_alpha_codes[ind][CFL_PRED_U] == 0) + assert(signs[CFL_PRED_U] == CFL_SIGN_POS); + if (cfl_alpha_codes[ind][CFL_PRED_V] == 0) + assert(signs[CFL_PRED_V] == CFL_SIGN_POS); + + // Write a symbol representing a combination of alpha Cb and alpha Cr. + aom_write_symbol(w, ind, frame_ctx->cfl_alpha_cdf, CFL_ALPHABET_SIZE); + + // Signs are only signaled for nonzero codes. + if (cfl_alpha_codes[ind][CFL_PRED_U] != 0) + aom_write_bit(w, signs[CFL_PRED_U]); + if (cfl_alpha_codes[ind][CFL_PRED_V] != 0) + aom_write_bit(w, signs[CFL_PRED_V]); + } +} +#endif + static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, const int mi_col, #if CONFIG_SUPERTX @@ -1734,9 +1684,6 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, #else FRAME_CONTEXT *ec_ctx = cm->fc; #endif -#if !CONFIG_REF_MV - nmv_context *nmvc = &ec_ctx->nmvc; -#endif const MODE_INFO *mi = xd->mi[0]; const struct segmentation *const seg = &cm->seg; @@ -1859,12 +1806,23 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, } #if CONFIG_CB4X4 if (is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x, - xd->plane[1].subsampling_y)) + xd->plane[1].subsampling_y)) { write_intra_uv_mode(ec_ctx, mbmi->uv_mode, mode, w); #else // !CONFIG_CB4X4 write_intra_uv_mode(ec_ctx, mbmi->uv_mode, mode, w); #endif // CONFIG_CB4X4 +#if CONFIG_CFL + if (mbmi->uv_mode == DC_PRED) { + write_cfl_alphas(ec_ctx, mbmi->skip, mbmi->cfl_alpha_idx, + mbmi->cfl_alpha_signs, w); + } +#endif + +#if CONFIG_CB4X4 + } +#endif + #if CONFIG_EXT_INTRA write_intra_angle_info(xd, ec_ctx, w); #endif // CONFIG_EXT_INTRA @@ -1874,13 +1832,12 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, #endif // CONFIG_PALETTE #if CONFIG_FILTER_INTRA if (bsize >= BLOCK_8X8 || unify_bsize) - write_filter_intra_mode_info(cm, mbmi, w); + write_filter_intra_mode_info(cm, xd, mbmi, mi_row, mi_col, w); #endif // CONFIG_FILTER_INTRA } else { int16_t mode_ctx; write_ref_frames(cm, xd, w); -#if CONFIG_REF_MV #if CONFIG_EXT_INTER if (is_compound) mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]]; @@ -1888,9 +1845,6 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, #endif // CONFIG_EXT_INTER mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame, bsize, -1); -#else // CONFIG_REF_MV - mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]]; -#endif // CONFIG_REF_MV // If segment skip is not enabled code the mode. if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { @@ -1902,7 +1856,6 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, #endif // CONFIG_EXT_INTER write_inter_mode(w, mode, ec_ctx, mode_ctx); -#if CONFIG_REF_MV #if CONFIG_EXT_INTER if (mode == NEWMV || mode == NEW_NEWMV || have_nearmv_in_inter_mode(mode)) @@ -1912,7 +1865,6 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, write_drl_idx(cm, mbmi, mbmi_ext, w); else assert(mbmi->ref_mv_idx == 0); -#endif } } @@ -1928,13 +1880,11 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, for (idx = 0; idx < 2; idx += num_4x4_w) { const int j = idy * 2 + idx; const PREDICTION_MODE b_mode = mi->bmi[j].as_mode; -#if CONFIG_REF_MV #if CONFIG_EXT_INTER if (!is_compound) #endif // CONFIG_EXT_INTER mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame, bsize, j); -#endif #if CONFIG_EXT_INTER if (is_inter_compound_mode(b_mode)) write_inter_compound_mode(cm, w, b_mode, mode_ctx); @@ -1948,45 +1898,35 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, if (b_mode == NEWMV) { #endif // CONFIG_EXT_INTER for (ref = 0; ref < 1 + is_compound; ++ref) { -#if CONFIG_REF_MV int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], mbmi_ext->ref_mv_stack[rf_type], ref, mbmi->ref_mv_idx); nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx]; -#endif av1_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv, #if CONFIG_EXT_INTER &mi->bmi[j].ref_mv[ref].as_mv, #else -#if CONFIG_REF_MV &mi->bmi[j].pred_mv[ref].as_mv, -#else - &mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0].as_mv, -#endif // CONFIG_REF_MV #endif // CONFIG_EXT_INTER nmvc, allow_hp); } } #if CONFIG_EXT_INTER else if (b_mode == NEAREST_NEWMV || b_mode == NEAR_NEWMV) { -#if CONFIG_REF_MV int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], mbmi_ext->ref_mv_stack[rf_type], 1, mbmi->ref_mv_idx); nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx]; -#endif av1_encode_mv(cpi, w, &mi->bmi[j].as_mv[1].as_mv, &mi->bmi[j].ref_mv[1].as_mv, nmvc, allow_hp); } else if (b_mode == NEW_NEARESTMV || b_mode == NEW_NEARMV) { -#if CONFIG_REF_MV int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx); nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx]; -#endif av1_encode_mv(cpi, w, &mi->bmi[j].as_mv[0].as_mv, &mi->bmi[j].ref_mv[0].as_mv, nmvc, allow_hp); } @@ -2001,37 +1941,31 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, #endif // CONFIG_EXT_INTER int_mv ref_mv; for (ref = 0; ref < 1 + is_compound; ++ref) { -#if CONFIG_REF_MV int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], mbmi_ext->ref_mv_stack[rf_type], ref, mbmi->ref_mv_idx); nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx]; -#endif ref_mv = mbmi_ext->ref_mvs[mbmi->ref_frame[ref]][0]; av1_encode_mv(cpi, w, &mbmi->mv[ref].as_mv, &ref_mv.as_mv, nmvc, allow_hp); } #if CONFIG_EXT_INTER } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) { -#if CONFIG_REF_MV int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], mbmi_ext->ref_mv_stack[rf_type], 1, mbmi->ref_mv_idx); nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx]; -#endif av1_encode_mv(cpi, w, &mbmi->mv[1].as_mv, &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv, nmvc, allow_hp); } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) { -#if CONFIG_REF_MV int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx); nmv_context *nmvc = &ec_ctx->nmvc[nmv_ctx]; -#endif av1_encode_mv(cpi, w, &mbmi->mv[0].as_mv, &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv, nmvc, allow_hp); @@ -2039,12 +1973,12 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, } } -#if CONFIG_EXT_INTER +#if CONFIG_EXT_INTER && CONFIG_INTERINTRA if (cpi->common.reference_mode != COMPOUND_REFERENCE && #if CONFIG_SUPERTX !supertx_enabled && #endif // CONFIG_SUPERTX - is_interintra_allowed(mbmi)) { + cpi->common.allow_interintra_compound && is_interintra_allowed(mbmi)) { const int interintra = mbmi->ref_frame[1] == INTRA_FRAME; const int bsize_group = size_group_lookup[bsize]; aom_write(w, interintra, cm->fc->interintra_prob[bsize_group]); @@ -2062,7 +1996,7 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, } } } -#endif // CONFIG_EXT_INTER +#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION #if CONFIG_SUPERTX @@ -2082,21 +2016,23 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, #endif // CONFIG_MOTION_VAR && is_any_masked_compound_used(bsize)) { #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE - av1_write_token(w, av1_compound_type_tree, - cm->fc->compound_type_prob[bsize], - &compound_type_encodings[mbmi->interinter_compound_type]); -#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE + if (cm->allow_masked_compound) { + av1_write_token( + w, av1_compound_type_tree, cm->fc->compound_type_prob[bsize], + &compound_type_encodings[mbmi->interinter_compound_type]); #if CONFIG_WEDGE - if (mbmi->interinter_compound_type == COMPOUND_WEDGE) { - aom_write_literal(w, mbmi->wedge_index, get_wedge_bits_lookup(bsize)); - aom_write_bit(w, mbmi->wedge_sign); - } + if (mbmi->interinter_compound_type == COMPOUND_WEDGE) { + aom_write_literal(w, mbmi->wedge_index, get_wedge_bits_lookup(bsize)); + aom_write_bit(w, mbmi->wedge_sign); + } #endif // CONFIG_WEDGE #if CONFIG_COMPOUND_SEGMENT - if (mbmi->interinter_compound_type == COMPOUND_SEG) { - aom_write_literal(w, mbmi->mask_type, MAX_SEG_MASK_BITS); - } + if (mbmi->interinter_compound_type == COMPOUND_SEG) { + aom_write_literal(w, mbmi->mask_type, MAX_SEG_MASK_BITS); + } #endif // CONFIG_COMPOUND_SEGMENT + } +#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE } #endif // CONFIG_EXT_INTER @@ -2114,15 +2050,17 @@ static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, #endif // !CONFIG_TXK_SEL } +static void write_mb_modes_kf(AV1_COMMON *cm, #if CONFIG_DELTA_Q -static void write_mb_modes_kf(AV1_COMMON *cm, MACROBLOCKD *xd, const int mi_row, - const int mi_col, aom_writer *w) { - int skip; + MACROBLOCKD *xd, #else -static void write_mb_modes_kf(AV1_COMMON *cm, const MACROBLOCKD *xd, + const MACROBLOCKD *xd, +#endif // CONFIG_DELTA_Q +#if CONFIG_INTRABC + const MB_MODE_INFO_EXT *mbmi_ext, +#endif // CONFIG_INTRABC const int mi_row, const int mi_col, aom_writer *w) { -#endif const struct segmentation *const seg = &cm->seg; struct segmentation_probs *const segp = &cm->fc->seg; const MODE_INFO *const mi = xd->mi[0]; @@ -2147,7 +2085,7 @@ static void write_mb_modes_kf(AV1_COMMON *cm, const MACROBLOCKD *xd, if (seg->update_map) write_segment_id(w, seg, segp, mbmi->segment_id); #if CONFIG_DELTA_Q - skip = write_skip(cm, xd, mbmi->segment_id, mi, w); + const int skip = write_skip(cm, xd, mbmi->segment_id, mi, w); if (cm->delta_q_present_flag) { int super_block_upper_left = ((mi_row & MAX_MIB_MASK) == 0) && ((mi_col & MAX_MIB_MASK) == 0); @@ -2188,13 +2126,19 @@ static void write_mb_modes_kf(AV1_COMMON *cm, const MACROBLOCKD *xd, #if CONFIG_INTRABC if (bsize >= BLOCK_8X8 && cm->allow_screen_content_tools) { int use_intrabc = is_intrabc_block(mbmi); - aom_write(w, use_intrabc, INTRABC_PROB); + aom_write(w, use_intrabc, ec_ctx->intrabc_prob); if (use_intrabc) { assert(mbmi->mode == DC_PRED); assert(mbmi->uv_mode == DC_PRED); - int_mv dv_ref; - av1_find_ref_dv(&dv_ref, mi_row, mi_col); + int_mv dv_ref = mbmi_ext->ref_mvs[INTRA_FRAME][0]; av1_encode_dv(w, &mbmi->mv[0].as_mv, &dv_ref.as_mv, &ec_ctx->ndvc); +#if CONFIG_EXT_TX && !CONFIG_TXK_SEL + av1_write_tx_type(cm, xd, +#if CONFIG_SUPERTX + 0, +#endif + w); +#endif // CONFIG_EXT_TX && !CONFIG_TXK_SEL return; } } @@ -2218,12 +2162,22 @@ static void write_mb_modes_kf(AV1_COMMON *cm, const MACROBLOCKD *xd, #if CONFIG_CB4X4 if (is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x, - xd->plane[1].subsampling_y)) + xd->plane[1].subsampling_y)) { write_intra_uv_mode(ec_ctx, mbmi->uv_mode, mbmi->mode, w); #else // !CONFIG_CB4X4 write_intra_uv_mode(ec_ctx, mbmi->uv_mode, mbmi->mode, w); #endif // CONFIG_CB4X4 +#if CONFIG_CFL + if (mbmi->uv_mode == DC_PRED) { + write_cfl_alphas(ec_ctx, mbmi->skip, mbmi->cfl_alpha_idx, + mbmi->cfl_alpha_signs, w); + } +#endif + +#if CONFIG_CB4X4 + } +#endif #if CONFIG_EXT_INTRA write_intra_angle_info(xd, ec_ctx, w); #endif // CONFIG_EXT_INTRA @@ -2233,7 +2187,7 @@ static void write_mb_modes_kf(AV1_COMMON *cm, const MACROBLOCKD *xd, #endif // CONFIG_PALETTE #if CONFIG_FILTER_INTRA if (bsize >= BLOCK_8X8 || unify_bsize) - write_filter_intra_mode_info(cm, mbmi, w); + write_filter_intra_mode_info(cm, xd, mbmi, mi_row, mi_col, w); #endif // CONFIG_FILTER_INTRA #if !CONFIG_TXK_SEL @@ -2325,12 +2279,17 @@ static void write_mbmi_b(AV1_COMP *cpi, const TileInfo *const tile, cm->mi_rows, cm->mi_cols); if (frame_is_intra_only(cm)) { - write_mb_modes_kf(cm, xd, mi_row, mi_col, w); + write_mb_modes_kf(cm, xd, +#if CONFIG_INTRABC + cpi->td.mb.mbmi_ext, +#endif // CONFIG_INTRABC + mi_row, mi_col, w); } else { #if CONFIG_VAR_TX - xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = - xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); + xd->above_txfm_context = + cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2); + xd->left_txfm_context = xd->left_txfm_context_buffer + + ((mi_row & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2); #endif #if CONFIG_DUAL_FILTER // has_subpel_mv_component needs the ref frame buffers set up to look @@ -2539,8 +2498,12 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile, const int bkh = tx_size_high_unit[max_tx_size]; for (row = 0; row < num_4x4_h; row += bkh) { for (col = 0; col < num_4x4_w; col += bkw) { - pack_txb_tokens(w, tok, tok_end, -#if CONFIG_PVQ + pack_txb_tokens(w, +#if CONFIG_LV_MAP + cm, +#endif + tok, tok_end, +#if CONFIG_PVQ || CONFIG_LV_MAP x, #endif xd, mbmi, plane, plane_bsize, cm->bit_depth, block, @@ -2556,10 +2519,10 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile, } #endif // CONFIG_RD_DEBUG } else { +#if CONFIG_LV_MAP + av1_write_coeffs_mb(cm, x, w, plane); +#else TX_SIZE tx = get_tx_size(plane, xd); -#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2 - tx = AOMMAX(TX_4X4, tx); -#endif const int bkw = tx_size_wide_unit[tx]; const int bkh = tx_size_high_unit[tx]; for (row = 0; row < num_4x4_h; row += bkh) { @@ -2571,6 +2534,7 @@ static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile, #endif } } +#endif // CONFIG_LV_MAP } #else TX_SIZE tx = get_tx_size(plane, xd); @@ -2727,7 +2691,7 @@ static void write_partition(const AV1_COMMON *const cm, #if CONFIG_EC_ADAPT FRAME_CONTEXT *ec_ctx = xd->tile_ctx; (void)cm; -#elif CONFIG_EC_MULTISYMBOL +#else FRAME_CONTEXT *ec_ctx = cm->fc; #endif @@ -2736,24 +2700,11 @@ static void write_partition(const AV1_COMMON *const cm, if (has_rows && has_cols) { #if CONFIG_EXT_PARTITION_TYPES if (bsize <= BLOCK_8X8) -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx], PARTITION_TYPES); -#else - av1_write_token(w, av1_partition_tree, probs, &partition_encodings[p]); -#endif else -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx], EXT_PARTITION_TYPES); #else - av1_write_token(w, av1_ext_partition_tree, probs, - &ext_partition_encodings[p]); -#endif // CONFIG_EC_MULTISYMBOL -#else -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx], PARTITION_TYPES); -#else - av1_write_token(w, av1_partition_tree, probs, &partition_encodings[p]); -#endif #endif // CONFIG_EXT_PARTITION_TYPES } else if (!has_rows && has_cols) { assert(p == PARTITION_SPLIT || p == PARTITION_HORZ); @@ -2920,7 +2871,6 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile, const int eset = get_ext_tx_set(supertx_size, bsize, 1, cm->reduced_tx_set_used); if (eset > 0) { -#if CONFIG_EC_MULTISYMBOL #if CONFIG_EC_ADAPT FRAME_CONTEXT *ec_ctx = xd->tile_ctx; #else @@ -2929,11 +2879,6 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile, aom_write_symbol(w, av1_ext_tx_inter_ind[eset][mbmi->tx_type], ec_ctx->inter_ext_tx_cdf[eset][supertx_size], ext_tx_cnt_inter[eset]); -#else - av1_write_token(w, av1_ext_tx_inter_tree[eset], - cm->fc->inter_ext_tx_prob[eset][supertx_size], - &ext_tx_inter_encodings[eset][mbmi->tx_type]); -#endif } } #else @@ -2989,21 +2934,11 @@ static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile, #endif // CONFIG_EXT_PARTITION_TYPES #if CONFIG_CDEF -#if CONFIG_EXT_PARTITION - if (cm->sb_size == BLOCK_128X128 && bsize == BLOCK_128X128 && - !sb_all_skip(cm, mi_row, mi_col)) { + if (bsize == cm->sb_size && !sb_all_skip(cm, mi_row, mi_col) && + cm->cdef_bits != 0) { aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col] ->mbmi.cdef_strength, cm->cdef_bits); - } else if (cm->sb_size == BLOCK_64X64 && bsize == BLOCK_64X64 && -#else - if (bsize == BLOCK_64X64 && -#endif // CONFIG_EXT_PARTITION - !sb_all_skip(cm, mi_row, mi_col)) { - if (cm->cdef_bits != 0) - aom_write_literal(w, cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col] - ->mbmi.cdef_strength, - cm->cdef_bits); } #endif } @@ -3066,7 +3001,7 @@ static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile, } #if !CONFIG_LV_MAP -#if !CONFIG_PVQ && !(CONFIG_EC_ADAPT && CONFIG_NEW_TOKENSET) +#if !CONFIG_PVQ && !CONFIG_EC_ADAPT static void build_tree_distribution(AV1_COMP *cpi, TX_SIZE tx_size, av1_coeff_stats *coef_branch_ct, av1_coeff_probs_model *coef_probs) { @@ -3097,7 +3032,7 @@ static void build_tree_distribution(AV1_COMP *cpi, TX_SIZE tx_size, } } -#if !(CONFIG_EC_ADAPT && CONFIG_NEW_TOKENSET) +#if !CONFIG_EC_ADAPT static void update_coef_probs_common(aom_writer *const bc, AV1_COMP *cpi, TX_SIZE tx_size, av1_coeff_stats *frame_branch_ct, @@ -3249,235 +3184,12 @@ static void update_coef_probs_common(aom_writer *const bc, AV1_COMP *cpi, } } #endif -#if CONFIG_SUBFRAME_PROB_UPDATE -// Calculate the token counts between subsequent subframe updates. -static void get_coef_counts_diff( - AV1_COMP *cpi, int index, - av1_coeff_count coef_counts[TX_SIZES][PLANE_TYPES], - unsigned int eob_counts[TX_SIZES][PLANE_TYPES][REF_TYPES][COEF_BANDS] - [COEFF_CONTEXTS]) { - int i, j, k, l, m, tx_size, val; - const int max_idx = cpi->common.coef_probs_update_idx; - const TX_MODE tx_mode = cpi->common.tx_mode; - const int max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; - const SUBFRAME_STATS *subframe_stats = &cpi->subframe_stats; - - assert(max_idx < COEF_PROBS_BUFS); - - for (tx_size = 0; tx_size <= max_tx_size; ++tx_size) - for (i = 0; i < PLANE_TYPES; ++i) - for (j = 0; j < REF_TYPES; ++j) - for (k = 0; k < COEF_BANDS; ++k) - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { - if (index == max_idx) { - val = - cpi->common.counts.eob_branch[tx_size][i][j][k][l] - - subframe_stats->eob_counts_buf[max_idx][tx_size][i][j][k][l]; - } else { - val = subframe_stats - ->eob_counts_buf[index + 1][tx_size][i][j][k][l] - - subframe_stats->eob_counts_buf[index][tx_size][i][j][k][l]; - } - assert(val >= 0); - eob_counts[tx_size][i][j][k][l] = val; - - for (m = 0; m < ENTROPY_TOKENS; ++m) { - if (index == max_idx) { - val = cpi->td.rd_counts.coef_counts[tx_size][i][j][k][l][m] - - subframe_stats - ->coef_counts_buf[max_idx][tx_size][i][j][k][l][m]; - } else { - val = subframe_stats - ->coef_counts_buf[index + 1][tx_size][i][j][k][l][m] - - subframe_stats - ->coef_counts_buf[index][tx_size][i][j][k][l][m]; - } - assert(val >= 0); - coef_counts[tx_size][i][j][k][l][m] = val; - } - } -} - -static void update_coef_probs_subframe( - aom_writer *const bc, AV1_COMP *cpi, TX_SIZE tx_size, - av1_coeff_stats branch_ct[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES], - av1_coeff_probs_model *new_coef_probs) { - av1_coeff_probs_model *old_coef_probs = cpi->common.fc->coef_probs[tx_size]; - const aom_prob upd = DIFF_UPDATE_PROB; - const int entropy_nodes_update = UNCONSTRAINED_NODES; - int i, j, k, l, t; - int stepsize = cpi->sf.coeff_prob_appx_step; - const int max_idx = cpi->common.coef_probs_update_idx; - int idx; - unsigned int this_branch_ct[ENTROPY_NODES][COEF_PROBS_BUFS][2]; - - switch (cpi->sf.use_fast_coef_updates) { - case TWO_LOOP: { - /* dry run to see if there is any update at all needed */ - int savings = 0; - int update[2] = { 0, 0 }; - for (i = 0; i < PLANE_TYPES; ++i) { - for (j = 0; j < REF_TYPES; ++j) { - for (k = 0; k < COEF_BANDS; ++k) { - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { - for (t = 0; t < ENTROPY_NODES; ++t) { - for (idx = 0; idx <= max_idx; ++idx) { - memcpy(this_branch_ct[t][idx], - branch_ct[idx][tx_size][i][j][k][l][t], - 2 * sizeof(this_branch_ct[t][idx][0])); - } - } - for (t = 0; t < entropy_nodes_update; ++t) { - aom_prob newp = new_coef_probs[i][j][k][l][t]; - const aom_prob oldp = old_coef_probs[i][j][k][l][t]; - int s, u = 0; - - if (t == PIVOT_NODE) - s = av1_prob_update_search_model_subframe( - this_branch_ct, old_coef_probs[i][j][k][l], &newp, upd, - stepsize, max_idx); - else - s = av1_prob_update_search_subframe(this_branch_ct[t], oldp, - &newp, upd, max_idx); - if (s > 0 && newp != oldp) u = 1; - if (u) - savings += s - (int)(av1_cost_zero(upd)); - else - savings -= (int)(av1_cost_zero(upd)); - update[u]++; - } - } - } - } - } - - /* Is coef updated at all */ - if (update[1] == 0 || savings < 0) { - aom_write_bit(bc, 0); - return; - } - aom_write_bit(bc, 1); - for (i = 0; i < PLANE_TYPES; ++i) { - for (j = 0; j < REF_TYPES; ++j) { - for (k = 0; k < COEF_BANDS; ++k) { - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { - for (t = 0; t < ENTROPY_NODES; ++t) { - for (idx = 0; idx <= max_idx; ++idx) { - memcpy(this_branch_ct[t][idx], - branch_ct[idx][tx_size][i][j][k][l][t], - 2 * sizeof(this_branch_ct[t][idx][0])); - } - } - for (t = 0; t < entropy_nodes_update; ++t) { - aom_prob newp = new_coef_probs[i][j][k][l][t]; - aom_prob *oldp = old_coef_probs[i][j][k][l] + t; - int s; - int u = 0; - - if (t == PIVOT_NODE) - s = av1_prob_update_search_model_subframe( - this_branch_ct, old_coef_probs[i][j][k][l], &newp, upd, - stepsize, max_idx); - else - s = av1_prob_update_search_subframe(this_branch_ct[t], *oldp, - &newp, upd, max_idx); - if (s > 0 && newp != *oldp) u = 1; - aom_write(bc, u, upd); - if (u) { - /* send/use new probability */ - av1_write_prob_diff_update(bc, newp, *oldp); - *oldp = newp; - } - } - } - } - } - } - return; - } - - case ONE_LOOP_REDUCED: { - int updates = 0; - int noupdates_before_first = 0; - for (i = 0; i < PLANE_TYPES; ++i) { - for (j = 0; j < REF_TYPES; ++j) { - for (k = 0; k < COEF_BANDS; ++k) { - for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { - for (t = 0; t < ENTROPY_NODES; ++t) { - for (idx = 0; idx <= max_idx; ++idx) { - memcpy(this_branch_ct[t][idx], - branch_ct[idx][tx_size][i][j][k][l][t], - 2 * sizeof(this_branch_ct[t][idx][0])); - } - } - for (t = 0; t < entropy_nodes_update; ++t) { - aom_prob newp = new_coef_probs[i][j][k][l][t]; - aom_prob *oldp = old_coef_probs[i][j][k][l] + t; - int s; - int u = 0; - if (t == PIVOT_NODE) - s = av1_prob_update_search_model_subframe( - this_branch_ct, old_coef_probs[i][j][k][l], &newp, upd, - stepsize, max_idx); - else - s = av1_prob_update_search_subframe(this_branch_ct[t], *oldp, - &newp, upd, max_idx); - if (s > 0 && newp != *oldp) u = 1; - updates += u; - if (u == 0 && updates == 0) { - noupdates_before_first++; - continue; - } - if (u == 1 && updates == 1) { - int v; - // first update - aom_write_bit(bc, 1); - for (v = 0; v < noupdates_before_first; ++v) - aom_write(bc, 0, upd); - } - aom_write(bc, u, upd); - if (u) { - /* send/use new probability */ - av1_write_prob_diff_update(bc, newp, *oldp); - *oldp = newp; - } - } - } - } - } - } - if (updates == 0) { - aom_write_bit(bc, 0); // no updates - } - return; - } - default: assert(0); - } -} -#endif // CONFIG_SUBFRAME_PROB_UPDATE - -#if !(CONFIG_EC_ADAPT && CONFIG_NEW_TOKENSET) +#if !CONFIG_EC_ADAPT static void update_coef_probs(AV1_COMP *cpi, aom_writer *w) { const TX_MODE tx_mode = cpi->common.tx_mode; const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; TX_SIZE tx_size; -#if CONFIG_SUBFRAME_PROB_UPDATE - AV1_COMMON *cm = &cpi->common; - SUBFRAME_STATS *subframe_stats = &cpi->subframe_stats; - int i; - av1_coeff_probs_model dummy_frame_coef_probs[PLANE_TYPES]; - - if (cm->do_subframe_update && - cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) { - av1_copy(cpi->common.fc->coef_probs, - subframe_stats->enc_starting_coef_probs); - for (i = 0; i <= cpi->common.coef_probs_update_idx; ++i) { - get_coef_counts_diff(cpi, i, cpi->wholeframe_stats.coef_counts_buf[i], - cpi->wholeframe_stats.eob_counts_buf[i]); - } - } -#endif // CONFIG_SUBFRAME_PROB_UPDATE for (tx_size = 0; tx_size <= max_tx_size; ++tx_size) { av1_coeff_stats frame_branch_ct[PLANE_TYPES]; @@ -3486,63 +3198,13 @@ static void update_coef_probs(AV1_COMP *cpi, aom_writer *w) { (tx_size >= TX_16X16 && cpi->sf.tx_size_search_method == USE_TX_8X8)) { aom_write_bit(w, 0); } else { -#if CONFIG_SUBFRAME_PROB_UPDATE - if (cm->do_subframe_update && - cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) { - unsigned int this_eob_counts_copy[PLANE_TYPES][REF_TYPES][COEF_BANDS] - [COEFF_CONTEXTS]; - av1_coeff_count coef_counts_copy[PLANE_TYPES]; - av1_copy(this_eob_counts_copy, cpi->common.counts.eob_branch[tx_size]); - av1_copy(coef_counts_copy, cpi->td.rd_counts.coef_counts[tx_size]); - build_tree_distribution(cpi, tx_size, frame_branch_ct, - frame_coef_probs); - for (i = 0; i <= cpi->common.coef_probs_update_idx; ++i) { - av1_copy(cpi->common.counts.eob_branch[tx_size], - cpi->wholeframe_stats.eob_counts_buf[i][tx_size]); - av1_copy(cpi->td.rd_counts.coef_counts[tx_size], - cpi->wholeframe_stats.coef_counts_buf[i][tx_size]); - build_tree_distribution(cpi, tx_size, cpi->branch_ct_buf[i][tx_size], - dummy_frame_coef_probs); - } - av1_copy(cpi->common.counts.eob_branch[tx_size], this_eob_counts_copy); - av1_copy(cpi->td.rd_counts.coef_counts[tx_size], coef_counts_copy); - - update_coef_probs_subframe(w, cpi, tx_size, cpi->branch_ct_buf, - frame_coef_probs); - } else { -#endif // CONFIG_SUBFRAME_PROB_UPDATE - build_tree_distribution(cpi, tx_size, frame_branch_ct, - frame_coef_probs); - update_coef_probs_common(w, cpi, tx_size, frame_branch_ct, - frame_coef_probs); -#if CONFIG_SUBFRAME_PROB_UPDATE - } -#endif // CONFIG_SUBFRAME_PROB_UPDATE - } - } - -#if CONFIG_SUBFRAME_PROB_UPDATE - av1_copy(cm->starting_coef_probs, cm->fc->coef_probs); - av1_copy(subframe_stats->coef_probs_buf[0], cm->fc->coef_probs); - if (cm->do_subframe_update && - cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) { - unsigned int eob_counts_copy[TX_SIZES][PLANE_TYPES][REF_TYPES][COEF_BANDS] - [COEFF_CONTEXTS]; - av1_copy(eob_counts_copy, cm->counts.eob_branch); - for (i = 1; i <= cpi->common.coef_probs_update_idx; ++i) { - for (tx_size = 0; tx_size <= max_tx_size; ++tx_size) - av1_full_to_model_counts(cm->counts.coef[tx_size], - subframe_stats->coef_counts_buf[i][tx_size]); - av1_copy(cm->counts.eob_branch, subframe_stats->eob_counts_buf[i]); - av1_partial_adapt_probs(cm, 0, 0); - av1_copy(subframe_stats->coef_probs_buf[i], cm->fc->coef_probs); + build_tree_distribution(cpi, tx_size, frame_branch_ct, frame_coef_probs); + update_coef_probs_common(w, cpi, tx_size, frame_branch_ct, + frame_coef_probs); } - av1_copy(cm->fc->coef_probs, subframe_stats->coef_probs_buf[0]); - av1_copy(cm->counts.eob_branch, eob_counts_copy); } -#endif // CONFIG_SUBFRAME_PROB_UPDATE } -#endif // !(CONFIG_EC_ADAPT && CONFIG_NEW_TOKENSET) +#endif // !CONFIG_EC_ADAPT #endif // !CONFIG_EC_ADAPT #endif // !CONFIG_LV_MAP @@ -3574,7 +3236,14 @@ static void encode_restoration_mode(AV1_COMMON *cm, rsi = &cm->rst_info[p]; switch (rsi->frame_restoration_type) { case RESTORE_NONE: aom_wb_write_bit(wb, 0); break; - case RESTORE_WIENER: aom_wb_write_bit(wb, 1); break; + case RESTORE_WIENER: + aom_wb_write_bit(wb, 1); + aom_wb_write_bit(wb, 0); + break; + case RESTORE_SGRPROJ: + aom_wb_write_bit(wb, 1); + aom_wb_write_bit(wb, 1); + break; default: assert(0); } } @@ -3687,6 +3356,7 @@ static void encode_restoration(AV1_COMMON *cm, aom_writer *wb) { } for (p = 1; p < MAX_MB_PLANE; ++p) { set_default_wiener(&ref_wiener_info); + set_default_sgrproj(&ref_sgrproj_info); rsi = &cm->rst_info[p]; if (rsi->frame_restoration_type == RESTORE_WIENER) { for (i = 0; i < ntiles_uv; ++i) { @@ -3697,6 +3367,15 @@ static void encode_restoration(AV1_COMMON *cm, aom_writer *wb) { write_wiener_filter(&rsi->wiener_info[i], &ref_wiener_info, wb); } } + } else if (rsi->frame_restoration_type == RESTORE_SGRPROJ) { + for (i = 0; i < ntiles_uv; ++i) { + if (ntiles_uv > 1) + aom_write(wb, rsi->restoration_type[i] != RESTORE_NONE, + RESTORE_NONE_SGRPROJ_PROB); + if (rsi->restoration_type[i] != RESTORE_NONE) { + write_sgrproj_filter(&rsi->sgrproj_info[i], &ref_sgrproj_info, wb); + } + } } else if (rsi->frame_restoration_type != RESTORE_NONE) { assert(0); } @@ -3972,6 +3651,9 @@ static void write_tile_info(const AV1_COMMON *const cm, aom_wb_write_literal(wb, tile_width - 1, 6); aom_wb_write_literal(wb, tile_height - 1, 6); } +#if CONFIG_DEPENDENT_HORZTILES + if (tile_height > 1) aom_wb_write_bit(wb, cm->dependent_horz_tiles); +#endif #else int min_log2_tile_cols, max_log2_tile_cols, ones; av1_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); @@ -3985,11 +3667,10 @@ static void write_tile_info(const AV1_COMMON *const cm, // rows aom_wb_write_bit(wb, cm->log2_tile_rows != 0); if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->log2_tile_rows != 1); -#endif // CONFIG_EXT_TILE - #if CONFIG_DEPENDENT_HORZTILES if (cm->log2_tile_rows != 0) aom_wb_write_bit(wb, cm->dependent_horz_tiles); #endif +#endif // CONFIG_EXT_TILE #if CONFIG_LOOPFILTERING_ACROSS_TILES aom_wb_write_bit(wb, cm->loop_filter_across_tiles_enabled); @@ -4442,9 +4123,6 @@ static void write_render_size(const AV1_COMMON *cm, #if CONFIG_FRAME_SUPERRES static void write_superres_scale(const AV1_COMMON *const cm, struct aom_write_bit_buffer *wb) { - // This scaling and frame superres are probably incompatible - assert(cm->width == cm->render_width && cm->height == cm->render_height); - // First bit is whether to to scale or not if (cm->superres_scale_numerator == SUPERRES_SCALE_DENOMINATOR) { aom_wb_write_bit(wb, 0); // no scaling @@ -4460,23 +4138,9 @@ static void write_superres_scale(const AV1_COMMON *const cm, static void write_frame_size(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) { -#if CONFIG_FRAME_SUPERRES - // If SUPERRES scaling is happening, write the full resolution instead of the - // downscaled resolution. The decoder will reduce this resolution itself. - if (cm->superres_scale_numerator != SUPERRES_SCALE_DENOMINATOR) { - aom_wb_write_literal(wb, cm->superres_width - 1, 16); - aom_wb_write_literal(wb, cm->superres_height - 1, 16); - } else { -#endif // CONFIG_FRAME_SUPERRES - aom_wb_write_literal(wb, cm->width - 1, 16); - aom_wb_write_literal(wb, cm->height - 1, 16); -#if CONFIG_FRAME_SUPERRES - } -#endif // CONFIG_FRAME_SUPERRES + aom_wb_write_literal(wb, cm->width - 1, 16); + aom_wb_write_literal(wb, cm->height - 1, 16); - // TODO(afergs): Also write something different to render_size? - // When superres scales, they'll be almost guaranteed to be - // different on the other side. write_render_size(cm, wb); #if CONFIG_FRAME_SUPERRES write_superres_scale(cm, wb); @@ -4559,6 +4223,28 @@ void write_sequence_header(SequenceHeader *seq_params) { } #endif +#if CONFIG_EXT_INTER +static void write_compound_tools(const AV1_COMMON *cm, + struct aom_write_bit_buffer *wb) { + (void)cm; + (void)wb; +#if CONFIG_INTERINTRA + if (!frame_is_intra_only(cm) && cm->reference_mode != COMPOUND_REFERENCE) { + aom_wb_write_bit(wb, cm->allow_interintra_compound); + } else { + assert(cm->allow_interintra_compound == 0); + } +#endif // CONFIG_INTERINTRA +#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT + if (!frame_is_intra_only(cm) && cm->reference_mode != SINGLE_REFERENCE) { + aom_wb_write_bit(wb, cm->allow_masked_compound); + } else { + assert(cm->allow_masked_compound == 0); + } +#endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT +} +#endif // CONFIG_EXT_INTER + static void write_uncompressed_header(AV1_COMP *cpi, struct aom_write_bit_buffer *wb) { AV1_COMMON *const cm = &cpi->common; @@ -4637,14 +4323,14 @@ static void write_uncompressed_header(AV1_COMP *cpi, assert(cpi->common.ans_window_size_log2 < 24); aom_wb_write_literal(wb, cpi->common.ans_window_size_log2 - 8, 4); #endif // CONFIG_ANS && ANS_MAX_SYMBOLS -#if CONFIG_PALETTE +#if CONFIG_PALETTE || CONFIG_INTRABC aom_wb_write_bit(wb, cm->allow_screen_content_tools); -#endif // CONFIG_PALETTE +#endif // CONFIG_PALETTE || CONFIG_INTRABC } else { if (!cm->show_frame) aom_wb_write_bit(wb, cm->intra_only); -#if CONFIG_PALETTE +#if CONFIG_PALETTE || CONFIG_INTRABC if (cm->intra_only) aom_wb_write_bit(wb, cm->allow_screen_content_tools); -#endif // CONFIG_PALETTE +#endif // CONFIG_PALETTE || CONFIG_INTRABC if (!cm->error_resilient_mode) { if (cm->intra_only) { aom_wb_write_bit(wb, @@ -4813,6 +4499,9 @@ static void write_uncompressed_header(AV1_COMP *cpi, if (!use_hybrid_pred) aom_wb_write_bit(wb, use_compound_pred); #endif // !CONFIG_REF_ADAPT } +#if CONFIG_EXT_INTER + write_compound_tools(cm, wb); +#endif // CONFIG_EXT_INTER #if CONFIG_EXT_TX aom_wb_write_bit(wb, cm->reduced_tx_set_used); @@ -4896,14 +4585,6 @@ static void write_global_motion(AV1_COMP *cpi, aom_writer *w) { AV1_COMMON *const cm = &cpi->common; int frame; for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) { -#if !CONFIG_REF_MV - // With ref-mv, clearing unused global motion models here is - // unsafe, and we need to rely on the recode loop to do it - // instead. See av1_find_mv_refs for details. - if (!cpi->td.rd_counts.global_motion_used[frame]) { - set_default_warp_params(&cm->global_motion[frame]); - } -#endif write_global_motion_params( &cm->global_motion[frame], &cm->prev_frame->global_motion[frame], cm->fc->global_motion_types_prob, w, cm->allow_high_precision_mv); @@ -4950,13 +4631,18 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) { #if !CONFIG_EC_ADAPT update_txfm_probs(cm, header_bc, counts); #endif +#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT + if (cm->tx_mode == TX_MODE_SELECT) + av1_cond_prob_diff_update(header_bc, &cm->fc->quarter_tx_size_prob, + cm->counts.quarter_tx_size, probwt); +#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT #if CONFIG_LV_MAP av1_write_txb_probs(cpi, header_bc); #else #if !CONFIG_PVQ -#if !(CONFIG_EC_ADAPT && CONFIG_NEW_TOKENSET) +#if !CONFIG_EC_ADAPT update_coef_probs(cpi, header_bc); -#endif // !(CONFIG_EC_ADAPT && CONFIG_NEW_TOKENSET) +#endif // !CONFIG_EC_ADAPT #endif // CONFIG_PVQ #endif // CONFIG_LV_MAP @@ -5023,9 +4709,7 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) { if (frame_is_intra_only(cm)) { av1_copy(cm->kf_y_prob, av1_kf_y_mode_prob); -#if CONFIG_EC_MULTISYMBOL av1_copy(cm->fc->kf_y_cdf, av1_kf_y_mode_cdf); -#endif #if !CONFIG_EC_ADAPT for (i = 0; i < INTRA_MODES; ++i) @@ -5034,21 +4718,19 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) { counts->kf_y_mode[i][j], INTRA_MODES, probwt, header_bc); #endif // CONFIG_EC_ADAPT - } else { -#if CONFIG_REF_MV - update_inter_mode_probs(cm, header_bc, counts); -#else -#if !CONFIG_EC_ADAPT - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) { - prob_diff_update(av1_inter_mode_tree, cm->fc->inter_mode_probs[i], - counts->inter_mode[i], INTER_MODES, probwt, header_bc); +#if CONFIG_INTRABC + if (cm->allow_screen_content_tools) { + av1_cond_prob_diff_update(header_bc, &fc->intrabc_prob, + cm->counts.intrabc, probwt); } #endif -#endif + } else { + update_inter_mode_probs(cm, header_bc, counts); #if CONFIG_EXT_INTER update_inter_compound_mode_probs(cm, probwt, header_bc); - - if (cm->reference_mode != COMPOUND_REFERENCE) { +#if CONFIG_INTERINTRA + if (cm->reference_mode != COMPOUND_REFERENCE && + cm->allow_interintra_compound) { for (i = 0; i < BLOCK_SIZE_GROUPS; i++) { if (is_interintra_allowed_bsize_group(i)) { av1_cond_prob_diff_update(header_bc, &fc->interintra_prob[i], @@ -5060,14 +4742,17 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) { av1_interintra_mode_tree, cm->fc->interintra_mode_prob[i], counts->interintra_mode[i], INTERINTRA_MODES, probwt, header_bc); } +#if CONFIG_WEDGE for (i = 0; i < BLOCK_SIZES; i++) { if (is_interintra_allowed_bsize(i) && is_interintra_wedge_used(i)) av1_cond_prob_diff_update(header_bc, &fc->wedge_interintra_prob[i], cm->counts.wedge_interintra[i], probwt); } +#endif // CONFIG_WEDGE } +#endif // CONFIG_INTERINTRA #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE - if (cm->reference_mode != SINGLE_REFERENCE) { + if (cm->reference_mode != SINGLE_REFERENCE && cm->allow_masked_compound) { for (i = 0; i < BLOCK_SIZES; i++) prob_diff_update(av1_compound_type_tree, fc->compound_type_prob[i], cm->counts.compound_interinter[i], COMPOUND_TYPES, @@ -5133,12 +4818,7 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) { } #endif - av1_write_nmv_probs(cm, cm->allow_high_precision_mv, header_bc, -#if CONFIG_REF_MV - counts->mv); -#else - &counts->mv); -#endif + av1_write_nmv_probs(cm, cm->allow_high_precision_mv, header_bc, counts->mv); #if !CONFIG_EC_ADAPT update_ext_tx_probs(cm, header_bc); #endif @@ -5149,22 +4829,12 @@ static uint32_t write_compressed_header(AV1_COMP *cpi, uint8_t *data) { write_global_motion(cpi, header_bc); #endif // CONFIG_GLOBAL_MOTION } -#if CONFIG_EC_MULTISYMBOL #if !CONFIG_EC_ADAPT -#if CONFIG_NEW_TOKENSET av1_coef_head_cdfs(fc); -#endif av1_coef_pareto_cdfs(fc); -#if CONFIG_REF_MV for (i = 0; i < NMV_CONTEXTS; ++i) av1_set_mv_cdfs(&fc->nmvc[i]); -#else - av1_set_mv_cdfs(&fc->nmvc); -#endif -#if CONFIG_EC_MULTISYMBOL av1_set_mode_cdfs(cm); -#endif #endif // !CONFIG_EC_ADAPT -#endif #if CONFIG_ANS aom_buf_ans_flush(header_bc); header_size = buf_ans_write_end(header_bc); diff --git a/third_party/aom/av1/encoder/block.h b/third_party/aom/av1/encoder/block.h index 39e08d5b4..e16479e64 100644 --- a/third_party/aom/av1/encoder/block.h +++ b/third_party/aom/av1/encoder/block.h @@ -17,9 +17,7 @@ #if CONFIG_PVQ #include "av1/encoder/encint.h" #endif -#if CONFIG_REF_MV #include "av1/common/mvref_common.h" -#endif #ifdef __cplusplus extern "C" { @@ -79,13 +77,11 @@ typedef struct { int dc_sign_ctx[MAX_MB_PLANE] [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; #endif -#if CONFIG_REF_MV uint8_t ref_mv_count[MODE_CTX_REF_FRAMES]; CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]; #if CONFIG_EXT_INTER int16_t compound_mode_context[MODE_CTX_REF_FRAMES]; #endif // CONFIG_EXT_INTER -#endif } MB_MODE_INFO_EXT; typedef struct { @@ -141,27 +137,18 @@ struct macroblock { unsigned int pred_sse[TOTAL_REFS_PER_FRAME]; int pred_mv_sad[TOTAL_REFS_PER_FRAME]; -#if CONFIG_REF_MV int *nmvjointcost; int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS]; int *nmvcost[NMV_CONTEXTS][2]; int *nmvcost_hp[NMV_CONTEXTS][2]; int **mv_cost_stack[NMV_CONTEXTS]; - int *nmvjointsadcost; -#else - int nmvjointcost[MV_JOINTS]; - int *nmvcost[2]; - int *nmvcost_hp[2]; - int nmvjointsadcost[MV_JOINTS]; -#endif - int **mvcost; - int *nmvsadcost[2]; - int *nmvsadcost_hp[2]; - int **mvsadcost; + #if CONFIG_MOTION_VAR int32_t *wsrc_buf; int32_t *mask_buf; + uint8_t *above_pred_buf; + uint8_t *left_pred_buf; #endif // CONFIG_MOTION_VAR #if CONFIG_PALETTE @@ -174,10 +161,8 @@ struct macroblock { #if CONFIG_VAR_TX uint8_t blk_skip[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 8]; -#if CONFIG_REF_MV uint8_t blk_skip_drl[MAX_MB_PLANE][MAX_MIB_SIZE * MAX_MIB_SIZE * 8]; #endif -#endif int skip; @@ -226,8 +211,11 @@ struct macroblock { // This is needed when using the 8x8 Daala distortion metric during RDO, // because it evaluates distortion in a different order than the underlying // 4x4 blocks are coded. - int rate_4x4[256]; -#endif + int rate_4x4[MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; +#if CONFIG_CB4X4 + DECLARE_ALIGNED(16, uint8_t, decoded_8x8[8 * 8]); +#endif // CONFIG_CB4X4 +#endif // CONFIG_DAALA_DIST #if CONFIG_CFL // Whether luma needs to be stored during RDO. int cfl_store_y; diff --git a/third_party/aom/av1/encoder/context_tree.h b/third_party/aom/av1/encoder/context_tree.h index 67954126c..4f9d5e374 100644 --- a/third_party/aom/av1/encoder/context_tree.h +++ b/third_party/aom/av1/encoder/context_tree.h @@ -34,7 +34,6 @@ typedef struct { uint8_t *blk_skip[MAX_MB_PLANE]; #endif - // dual buffer pointers, 0: in use, 1: best in store tran_low_t *coeff[MAX_MB_PLANE]; tran_low_t *qcoeff[MAX_MB_PLANE]; tran_low_t *dqcoeff[MAX_MB_PLANE]; @@ -48,9 +47,8 @@ typedef struct { int num_4x4_blk; int skip; - int pred_pixel_ready; // For current partition, only if all Y, U, and V transform blocks' - // coefficients are quantized to 0, skippable is set to 0. + // coefficients are quantized to 0, skippable is set to 1. int skippable; int best_mode_index; int hybrid_pred_diff; diff --git a/third_party/aom/av1/encoder/corner_match.c b/third_party/aom/av1/encoder/corner_match.c index 64ee0c5ae..3827b65fa 100644 --- a/third_party/aom/av1/encoder/corner_match.c +++ b/third_party/aom/av1/encoder/corner_match.c @@ -9,16 +9,13 @@ * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ -#include <stdio.h> #include <stdlib.h> #include <memory.h> #include <math.h> +#include "./av1_rtcd.h" #include "av1/encoder/corner_match.h" -#define MATCH_SZ 13 -#define MATCH_SZ_BY2 ((MATCH_SZ - 1) / 2) -#define MATCH_SZ_SQ (MATCH_SZ * MATCH_SZ) #define SEARCH_SZ 9 #define SEARCH_SZ_BY2 ((SEARCH_SZ - 1) / 2) @@ -28,8 +25,8 @@ centered at (x, y). */ static double compute_variance(unsigned char *im, int stride, int x, int y) { - int sum = 0.0; - int sumsq = 0.0; + int sum = 0; + int sumsq = 0; int var; int i, j; for (i = 0; i < MATCH_SZ; ++i) @@ -46,9 +43,9 @@ static double compute_variance(unsigned char *im, int stride, int x, int y) { correlation/standard deviation are taken over MATCH_SZ by MATCH_SZ windows of each image, centered at (x1, y1) and (x2, y2) respectively. */ -static double compute_cross_correlation(unsigned char *im1, int stride1, int x1, - int y1, unsigned char *im2, int stride2, - int x2, int y2) { +double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1, + int y1, unsigned char *im2, int stride2, + int x2, int y2) { int v1, v2; int sum1 = 0; int sum2 = 0; diff --git a/third_party/aom/av1/encoder/corner_match.h b/third_party/aom/av1/encoder/corner_match.h index c0458642c..3b16f9efc 100644 --- a/third_party/aom/av1/encoder/corner_match.h +++ b/third_party/aom/av1/encoder/corner_match.h @@ -15,6 +15,10 @@ #include <stdlib.h> #include <memory.h> +#define MATCH_SZ 13 +#define MATCH_SZ_BY2 ((MATCH_SZ - 1) / 2) +#define MATCH_SZ_SQ (MATCH_SZ * MATCH_SZ) + typedef struct { int x, y; int rx, ry; diff --git a/third_party/aom/av1/encoder/daala_compat_enc.c b/third_party/aom/av1/encoder/daala_compat_enc.c index 3df424cac..c60e2d3d7 100644 --- a/third_party/aom/av1/encoder/daala_compat_enc.c +++ b/third_party/aom/av1/encoder/daala_compat_enc.c @@ -12,19 +12,19 @@ #include "encint.h" void od_encode_checkpoint(const daala_enc_ctx *enc, od_rollback_buffer *rbuf) { -#if CONFIG_DAALA_EC +#if !CONFIG_ANS od_ec_enc_checkpoint(&rbuf->ec, &enc->w.ec); #else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +#error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif OD_COPY(&rbuf->adapt, enc->state.adapt, 1); } void od_encode_rollback(daala_enc_ctx *enc, const od_rollback_buffer *rbuf) { -#if CONFIG_DAALA_EC +#if !CONFIG_ANS od_ec_enc_rollback(&enc->w.ec, &rbuf->ec); #else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +#error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif OD_COPY(enc->state.adapt, &rbuf->adapt, 1); } diff --git a/third_party/aom/av1/encoder/dct.c b/third_party/aom/av1/encoder/dct.c index 09e1b0563..f6b64f0f7 100644 --- a/third_party/aom/av1/encoder/dct.c +++ b/third_party/aom/av1/encoder/dct.c @@ -19,7 +19,7 @@ #include "aom_ports/mem.h" #include "av1/common/blockd.h" #include "av1/common/av1_fwd_txfm1d.h" -#include "av1/common/av1_fwd_txfm2d_cfg.h" +#include "av1/common/av1_fwd_txfm1d_cfg.h" #include "av1/common/idct.h" static INLINE void range_check(const tran_low_t *input, const int size, @@ -1022,6 +1022,10 @@ static void fhalfright32(const tran_low_t *input, tran_low_t *output) { } #if CONFIG_EXT_TX +// TODO(sarahparker) these functions will be removed once the highbitdepth +// codepath works properly for rectangular transforms. They have almost +// identical versions in av1_fwd_txfm1d.c, but those are currently only +// being used for square transforms. static void fidtx4(const tran_low_t *input, tran_low_t *output) { int i; for (i = 0; i < 4; ++i) @@ -2133,8 +2137,7 @@ static void fdct64_col(const tran_low_t *input, tran_low_t *output) { int32_t in[64], out[64]; int i; for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i]; - av1_fdct64_new(in, out, fwd_cos_bit_col_dct_dct_64, - fwd_stage_range_col_dct_dct_64); + av1_fdct64_new(in, out, fwd_cos_bit_col_dct_64, fwd_stage_range_col_dct_64); for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i]; } @@ -2142,8 +2145,7 @@ static void fdct64_row(const tran_low_t *input, tran_low_t *output) { int32_t in[64], out[64]; int i; for (i = 0; i < 64; ++i) in[i] = (int32_t)input[i]; - av1_fdct64_new(in, out, fwd_cos_bit_row_dct_dct_64, - fwd_stage_range_row_dct_dct_64); + av1_fdct64_new(in, out, fwd_cos_bit_row_dct_64, fwd_stage_range_row_dct_64); for (i = 0; i < 64; ++i) output[i] = (tran_low_t)out[i]; } @@ -2225,4 +2227,49 @@ void av1_highbd_fht64x64_c(const int16_t *input, tran_low_t *output, int stride, } #endif // CONFIG_TX64X64 #endif // CONFIG_HIGHBITDEPTH + +#if CONFIG_DPCM_INTRA +void av1_dpcm_ft4_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, + tran_low_t *output) { + assert(tx_type < TX_TYPES_1D); + static const transform_1d FHT[] = { fdct4, fadst4, fadst4, fidtx4 }; + const transform_1d ft = FHT[tx_type]; + tran_low_t temp_in[4]; + for (int i = 0; i < 4; ++i) + temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 4 * Sqrt2); + ft(temp_in, output); +} + +void av1_dpcm_ft8_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, + tran_low_t *output) { + assert(tx_type < TX_TYPES_1D); + static const transform_1d FHT[] = { fdct8, fadst8, fadst8, fidtx8 }; + const transform_1d ft = FHT[tx_type]; + tran_low_t temp_in[8]; + for (int i = 0; i < 8; ++i) temp_in[i] = input[i * stride] * 4; + ft(temp_in, output); +} + +void av1_dpcm_ft16_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, + tran_low_t *output) { + assert(tx_type < TX_TYPES_1D); + static const transform_1d FHT[] = { fdct16, fadst16, fadst16, fidtx16 }; + const transform_1d ft = FHT[tx_type]; + tran_low_t temp_in[16]; + for (int i = 0; i < 16; ++i) + temp_in[i] = (tran_low_t)fdct_round_shift(input[i * stride] * 2 * Sqrt2); + ft(temp_in, output); +} + +void av1_dpcm_ft32_c(const int16_t *input, int stride, TX_TYPE_1D tx_type, + tran_low_t *output) { + assert(tx_type < TX_TYPES_1D); + static const transform_1d FHT[] = { fdct32, fhalfright32, fhalfright32, + fidtx32 }; + const transform_1d ft = FHT[tx_type]; + tran_low_t temp_in[32]; + for (int i = 0; i < 32; ++i) temp_in[i] = input[i * stride]; + ft(temp_in, output); +} +#endif // CONFIG_DPCM_INTRA #endif // !AV1_DCT_GTEST diff --git a/third_party/aom/av1/encoder/encodeframe.c b/third_party/aom/av1/encoder/encodeframe.c index d254157e7..36d09c02a 100644 --- a/third_party/aom/av1/encoder/encodeframe.c +++ b/third_party/aom/av1/encoder/encodeframe.c @@ -72,8 +72,7 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row, - int mi_col, BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx, int *rate); + int mi_col, BLOCK_SIZE bsize, int *rate); #if CONFIG_SUPERTX static int check_intra_b(PICK_MODE_CONTEXT *ctx); @@ -273,14 +272,13 @@ static void set_offsets_without_segment_id(const AV1_COMP *const cpi, const int mi_width = mi_size_wide[bsize]; const int mi_height = mi_size_high[bsize]; - set_skip_context(xd, mi_row, mi_col); - set_mode_info_offsets(cpi, x, xd, mi_row, mi_col); - + set_skip_context(xd, mi_row, mi_col); #if CONFIG_VAR_TX - xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = - xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); + xd->above_txfm_context = + cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2); + xd->left_txfm_context = xd->left_txfm_context_buffer + + ((mi_row & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2); xd->max_tx_size = max_txsize_lookup[bsize]; #endif @@ -452,563 +450,6 @@ static void set_segment_id_supertx(const AV1_COMP *const cpi, } #endif // CONFIG_SUPERTX -static void set_block_size(AV1_COMP *const cpi, MACROBLOCK *const x, - MACROBLOCKD *const xd, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { - const int mi_width = AOMMAX(mi_size_wide[bsize], mi_size_wide[BLOCK_8X8]); - const int mi_height = AOMMAX(mi_size_high[bsize], mi_size_high[BLOCK_8X8]); - for (int r = 0; r < mi_height; ++r) { - for (int c = 0; c < mi_width; ++c) { - set_mode_info_offsets(cpi, x, xd, mi_row + r, mi_col + c); - xd->mi[0]->mbmi.sb_type = bsize; - } - } - } -} - -static void set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x, - MACROBLOCKD *const xd, VAR_TREE *vt, int mi_row, - int mi_col, const int64_t *const threshold, - const BLOCK_SIZE *const bsize_min) { - AV1_COMMON *const cm = &cpi->common; - const int hbw = mi_size_wide[vt->bsize] / 2; - const int hbh = mi_size_high[vt->bsize] / 2; - const int has_cols = mi_col + hbw < cm->mi_cols; - const int has_rows = mi_row + hbh < cm->mi_rows; - - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - - assert(vt->bsize >= BLOCK_8X8); - - assert(hbh == hbw); - - if (vt->bsize == BLOCK_8X8 && cm->frame_type != KEY_FRAME) { - set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_8X8); - return; - } - - if (vt->force_split || (!has_cols && !has_rows)) goto split; - - // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if - // variance is below threshold, otherwise split will be selected. - // No check for vert/horiz split as too few samples for variance. - if (vt->bsize == bsize_min[0]) { - if (has_cols && has_rows && vt->variances.none.variance < threshold[0]) { - set_block_size(cpi, x, xd, mi_row, mi_col, vt->bsize); - return; - } else { - BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_SPLIT); - set_block_size(cpi, x, xd, mi_row, mi_col, subsize); - if (vt->bsize > BLOCK_8X8) { - set_block_size(cpi, x, xd, mi_row, mi_col + hbw, subsize); - set_block_size(cpi, x, xd, mi_row + hbh, mi_col, subsize); - set_block_size(cpi, x, xd, mi_row + hbh, mi_col + hbw, subsize); - } - return; - } - } else if (vt->bsize > bsize_min[0]) { - // For key frame: take split for bsize above 32X32 or very high variance. - if (cm->frame_type == KEY_FRAME && - (vt->bsize > BLOCK_32X32 || - vt->variances.none.variance > (threshold[0] << 4))) { - goto split; - } - // If variance is low, take the bsize (no split). - if (has_cols && has_rows && vt->variances.none.variance < threshold[0]) { - set_block_size(cpi, x, xd, mi_row, mi_col, vt->bsize); - return; - } - - // Check vertical split. - if (has_rows) { - BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_VERT); - if (vt->variances.vert[0].variance < threshold[0] && - vt->variances.vert[1].variance < threshold[0] && - get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { - set_block_size(cpi, x, xd, mi_row, mi_col, subsize); - set_block_size(cpi, x, xd, mi_row, mi_col + hbw, subsize); - return; - } - } - // Check horizontal split. - if (has_cols) { - BLOCK_SIZE subsize = get_subsize(vt->bsize, PARTITION_HORZ); - if (vt->variances.horz[0].variance < threshold[0] && - vt->variances.horz[1].variance < threshold[0] && - get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { - set_block_size(cpi, x, xd, mi_row, mi_col, subsize); - set_block_size(cpi, x, xd, mi_row + hbh, mi_col, subsize); - return; - } - } - } - -split : { - set_vt_partitioning(cpi, x, xd, vt->split[0], mi_row, mi_col, threshold + 1, - bsize_min + 1); - set_vt_partitioning(cpi, x, xd, vt->split[1], mi_row, mi_col + hbw, - threshold + 1, bsize_min + 1); - set_vt_partitioning(cpi, x, xd, vt->split[2], mi_row + hbh, mi_col, - threshold + 1, bsize_min + 1); - set_vt_partitioning(cpi, x, xd, vt->split[3], mi_row + hbh, mi_col + hbw, - threshold + 1, bsize_min + 1); - return; -} -} - -// Set the variance split thresholds for following the block sizes: -// 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16, -// 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is -// currently only used on key frame. -static void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[], int q) { - AV1_COMMON *const cm = &cpi->common; - const int is_key_frame = (cm->frame_type == KEY_FRAME); - const int threshold_multiplier = is_key_frame ? 20 : 1; - const int64_t threshold_base = - (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]); - if (is_key_frame) { - thresholds[1] = threshold_base; - thresholds[2] = threshold_base >> 2; - thresholds[3] = threshold_base >> 2; - thresholds[4] = threshold_base << 2; - } else { - thresholds[2] = threshold_base; - if (cm->width <= 352 && cm->height <= 288) { - thresholds[1] = threshold_base >> 2; - thresholds[3] = threshold_base << 3; - } else { - thresholds[1] = threshold_base; - thresholds[2] = (5 * threshold_base) >> 2; - if (cm->width >= 1920 && cm->height >= 1080) - thresholds[2] = (7 * threshold_base) >> 2; - thresholds[3] = threshold_base << cpi->oxcf.speed; - } - } - thresholds[0] = INT64_MIN; -} - -void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q) { - AV1_COMMON *const cm = &cpi->common; - SPEED_FEATURES *const sf = &cpi->sf; - const int is_key_frame = (cm->frame_type == KEY_FRAME); - if (sf->partition_search_type != VAR_BASED_PARTITION && - sf->partition_search_type != REFERENCE_PARTITION) { - return; - } else { - set_vbp_thresholds(cpi, cpi->vbp_thresholds, q); - // The thresholds below are not changed locally. - if (is_key_frame) { - cpi->vbp_threshold_sad = 0; - cpi->vbp_bsize_min = BLOCK_8X8; - } else { - if (cm->width <= 352 && cm->height <= 288) - cpi->vbp_threshold_sad = 100; - else - cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 - ? (cpi->y_dequant[q][1] << 1) - : 1000; - cpi->vbp_bsize_min = BLOCK_16X16; - } - cpi->vbp_threshold_minmax = 15 + (q >> 3); - } -} - -// Compute the minmax over the 8x8 subblocks. -static int compute_minmax_8x8(const uint8_t *src, int src_stride, - const uint8_t *ref, int ref_stride, -#if CONFIG_HIGHBITDEPTH - int highbd, -#endif - int pixels_wide, int pixels_high) { - int k; - int minmax_max = 0; - int minmax_min = 255; - // Loop over the 4 8x8 subblocks. - for (k = 0; k < 4; k++) { - const int x8_idx = ((k & 1) << 3); - const int y8_idx = ((k >> 1) << 3); - int min = 0; - int max = 0; - if (x8_idx < pixels_wide && y8_idx < pixels_high) { - const int src_offset = y8_idx * src_stride + x8_idx; - const int ref_offset = y8_idx * ref_stride + x8_idx; -#if CONFIG_HIGHBITDEPTH - if (highbd) { - aom_highbd_minmax_8x8(src + src_offset, src_stride, ref + ref_offset, - ref_stride, &min, &max); - } else { - aom_minmax_8x8(src + src_offset, src_stride, ref + ref_offset, - ref_stride, &min, &max); - } -#else - aom_minmax_8x8(src + src_offset, src_stride, ref + ref_offset, ref_stride, - &min, &max); -#endif - if ((max - min) > minmax_max) minmax_max = (max - min); - if ((max - min) < minmax_min) minmax_min = (max - min); - } - } - return (minmax_max - minmax_min); -} - -#if CONFIG_HIGHBITDEPTH -static INLINE int avg_4x4(const uint8_t *const src, const int stride, - const int highbd) { - if (highbd) { - return aom_highbd_avg_4x4(src, stride); - } else { - return aom_avg_4x4(src, stride); - } -} -#else -static INLINE int avg_4x4(const uint8_t *const src, const int stride) { - return aom_avg_4x4(src, stride); -} -#endif - -#if CONFIG_HIGHBITDEPTH -static INLINE int avg_8x8(const uint8_t *const src, const int stride, - const int highbd) { - if (highbd) { - return aom_highbd_avg_8x8(src, stride); - } else { - return aom_avg_8x8(src, stride); - } -} -#else -static INLINE int avg_8x8(const uint8_t *const src, const int stride) { - return aom_avg_8x8(src, stride); -} -#endif - -static void init_variance_tree(VAR_TREE *const vt, -#if CONFIG_HIGHBITDEPTH - const int highbd, -#endif - BLOCK_SIZE bsize, BLOCK_SIZE leaf_size, - const int width, const int height, - const uint8_t *const src, const int src_stride, - const uint8_t *const ref, const int ref_stride) { - assert(bsize >= leaf_size); - - vt->bsize = bsize; - - vt->force_split = 0; - - vt->src = src; - vt->src_stride = src_stride; - vt->ref = ref; - vt->ref_stride = ref_stride; - - vt->width = width; - vt->height = height; - -#if CONFIG_HIGHBITDEPTH - vt->highbd = highbd; -#endif // CONFIG_HIGHBITDEPTH - - if (bsize > leaf_size) { - const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); - const int px = block_size_wide[subsize]; - - init_variance_tree(vt->split[0], -#if CONFIG_HIGHBITDEPTH - highbd, -#endif // CONFIG_HIGHBITDEPTH - subsize, leaf_size, AOMMIN(px, width), - AOMMIN(px, height), src, src_stride, ref, ref_stride); - init_variance_tree(vt->split[1], -#if CONFIG_HIGHBITDEPTH - highbd, -#endif // CONFIG_HIGHBITDEPTH - subsize, leaf_size, width - px, AOMMIN(px, height), - src + px, src_stride, ref + px, ref_stride); - init_variance_tree(vt->split[2], -#if CONFIG_HIGHBITDEPTH - highbd, -#endif // CONFIG_HIGHBITDEPTH - subsize, leaf_size, AOMMIN(px, width), height - px, - src + px * src_stride, src_stride, ref + px * ref_stride, - ref_stride); - init_variance_tree(vt->split[3], -#if CONFIG_HIGHBITDEPTH - highbd, -#endif // CONFIG_HIGHBITDEPTH - subsize, leaf_size, width - px, height - px, - src + px * src_stride + px, src_stride, - ref + px * ref_stride + px, ref_stride); - } -} - -// Fill the variance tree based on averaging pixel values (sub-sampling), at -// the leaf node size. -static void fill_variance_tree(VAR_TREE *const vt, const BLOCK_SIZE leaf_size) { - if (vt->bsize > leaf_size) { - fill_variance_tree(vt->split[0], leaf_size); - fill_variance_tree(vt->split[1], leaf_size); - fill_variance_tree(vt->split[2], leaf_size); - fill_variance_tree(vt->split[3], leaf_size); - fill_variance_node(vt); - } else if (vt->width <= 0 || vt->height <= 0) { - fill_variance(0, 0, 0, &vt->variances.none); - } else { - unsigned int sse = 0; - int sum = 0; - int src_avg; - int ref_avg; - assert(leaf_size == BLOCK_4X4 || leaf_size == BLOCK_8X8); - if (leaf_size == BLOCK_4X4) { - src_avg = avg_4x4(vt->src, vt->src_stride IF_HBD(, vt->highbd)); - ref_avg = avg_4x4(vt->ref, vt->ref_stride IF_HBD(, vt->highbd)); - } else { - src_avg = avg_8x8(vt->src, vt->src_stride IF_HBD(, vt->highbd)); - ref_avg = avg_8x8(vt->ref, vt->ref_stride IF_HBD(, vt->highbd)); - } - sum = src_avg - ref_avg; - sse = sum * sum; - fill_variance(sse, sum, 0, &vt->variances.none); - } -} - -static void refine_variance_tree(VAR_TREE *const vt, const int64_t threshold) { - if (vt->bsize >= BLOCK_8X8) { - if (vt->bsize == BLOCK_16X16) { - if (vt->variances.none.variance <= threshold) - return; - else - vt->force_split = 0; - } - - refine_variance_tree(vt->split[0], threshold); - refine_variance_tree(vt->split[1], threshold); - refine_variance_tree(vt->split[2], threshold); - refine_variance_tree(vt->split[3], threshold); - - if (vt->bsize <= BLOCK_16X16) fill_variance_node(vt); - } else if (vt->width <= 0 || vt->height <= 0) { - fill_variance(0, 0, 0, &vt->variances.none); - } else { - const int src_avg = avg_4x4(vt->src, vt->src_stride IF_HBD(, vt->highbd)); - const int ref_avg = avg_4x4(vt->ref, vt->ref_stride IF_HBD(, vt->highbd)); - const int sum = src_avg - ref_avg; - const unsigned int sse = sum * sum; - assert(vt->bsize == BLOCK_4X4); - fill_variance(sse, sum, 0, &vt->variances.none); - } -} - -static int check_split_key_frame(VAR_TREE *const vt, const int64_t threshold) { - if (vt->bsize == BLOCK_32X32) { - vt->force_split = vt->variances.none.variance > threshold; - } else { - vt->force_split |= check_split_key_frame(vt->split[0], threshold); - vt->force_split |= check_split_key_frame(vt->split[1], threshold); - vt->force_split |= check_split_key_frame(vt->split[2], threshold); - vt->force_split |= check_split_key_frame(vt->split[3], threshold); - } - return vt->force_split; -} - -static int check_split(AV1_COMP *const cpi, VAR_TREE *const vt, - const int segment_id, const int64_t *const thresholds) { - if (vt->bsize == BLOCK_16X16) { - vt->force_split = vt->variances.none.variance > thresholds[0]; - if (!vt->force_split && vt->variances.none.variance > thresholds[-1] && - !cyclic_refresh_segment_id_boosted(segment_id)) { - // We have some nominal amount of 16x16 variance (based on average), - // compute the minmax over the 8x8 sub-blocks, and if above threshold, - // force split to 8x8 block for this 16x16 block. - int minmax = - compute_minmax_8x8(vt->src, vt->src_stride, vt->ref, vt->ref_stride, -#if CONFIG_HIGHBITDEPTH - vt->highbd, -#endif - vt->width, vt->height); - vt->force_split = minmax > cpi->vbp_threshold_minmax; - } - } else { - vt->force_split |= - check_split(cpi, vt->split[0], segment_id, thresholds + 1); - vt->force_split |= - check_split(cpi, vt->split[1], segment_id, thresholds + 1); - vt->force_split |= - check_split(cpi, vt->split[2], segment_id, thresholds + 1); - vt->force_split |= - check_split(cpi, vt->split[3], segment_id, thresholds + 1); - - if (vt->bsize == BLOCK_32X32 && !vt->force_split) { - vt->force_split = vt->variances.none.variance > thresholds[0]; - } - } - - return vt->force_split; -} - -// This function chooses partitioning based on the variance between source and -// reconstructed last (or golden), where variance is computed for down-sampled -// inputs. -static void choose_partitioning(AV1_COMP *const cpi, ThreadData *const td, - const TileInfo *const tile, MACROBLOCK *const x, - const int mi_row, const int mi_col) { - AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - VAR_TREE *const vt = td->var_root[cm->mib_size_log2 - MIN_MIB_SIZE_LOG2]; -#if CONFIG_DUAL_FILTER - int i; -#endif - const uint8_t *src; - const uint8_t *ref; - int src_stride; - int ref_stride; - int pixels_wide = MI_SIZE * mi_size_wide[cm->sb_size]; - int pixels_high = MI_SIZE * mi_size_high[cm->sb_size]; - int64_t thresholds[5] = { - cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], cpi->vbp_thresholds[2], - cpi->vbp_thresholds[3], cpi->vbp_thresholds[4], - }; - BLOCK_SIZE bsize_min[5] = { BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, - cpi->vbp_bsize_min, BLOCK_8X8 }; - const int start_level = cm->sb_size == BLOCK_64X64 ? 1 : 0; - const int64_t *const thre = thresholds + start_level; - const BLOCK_SIZE *const bmin = bsize_min + start_level; - - const int is_key_frame = (cm->frame_type == KEY_FRAME); - const int low_res = (cm->width <= 352 && cm->height <= 288); - - int segment_id = CR_SEGMENT_ID_BASE; - - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { - const uint8_t *const map = - cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - segment_id = get_segment_id(cm, map, cm->sb_size, mi_row, mi_col); - - if (cyclic_refresh_segment_id_boosted(segment_id)) { - int q = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex); - set_vbp_thresholds(cpi, thresholds, q); - } - } - - set_offsets(cpi, tile, x, mi_row, mi_col, cm->sb_size); - - if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); - if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); - - src = x->plane[0].src.buf; - src_stride = x->plane[0].src.stride; - - if (!is_key_frame) { - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); - const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); - unsigned int y_sad, y_sad_g; - - const int hbs = cm->mib_size / 2; - const int split_vert = mi_col + hbs >= cm->mi_cols; - const int split_horz = mi_row + hbs >= cm->mi_rows; - BLOCK_SIZE bsize; - - if (split_vert && split_horz) - bsize = get_subsize(cm->sb_size, PARTITION_SPLIT); - else if (split_vert) - bsize = get_subsize(cm->sb_size, PARTITION_VERT); - else if (split_horz) - bsize = get_subsize(cm->sb_size, PARTITION_HORZ); - else - bsize = cm->sb_size; - - assert(yv12 != NULL); - - if (yv12_g && yv12_g != yv12) { - av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, - &cm->frame_refs[GOLDEN_FRAME - 1].sf); - y_sad_g = cpi->fn_ptr[bsize].sdf( - x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, - xd->plane[0].pre[0].stride); - } else { - y_sad_g = UINT_MAX; - } - - av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, - &cm->frame_refs[LAST_FRAME - 1].sf); - mbmi->ref_frame[0] = LAST_FRAME; - mbmi->ref_frame[1] = NONE_FRAME; - mbmi->sb_type = cm->sb_size; - mbmi->mv[0].as_int = 0; -#if CONFIG_DUAL_FILTER - for (i = 0; i < 4; ++i) mbmi->interp_filter[i] = BILINEAR; -#else - mbmi->interp_filter = BILINEAR; -#endif - - y_sad = av1_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col); - - if (y_sad_g < y_sad) { - av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, - &cm->frame_refs[GOLDEN_FRAME - 1].sf); - mbmi->ref_frame[0] = GOLDEN_FRAME; - mbmi->mv[0].as_int = 0; - y_sad = y_sad_g; - } else { - x->pred_mv[LAST_FRAME] = mbmi->mv[0].as_mv; - } - - av1_build_inter_predictors_sb(xd, mi_row, mi_col, NULL, cm->sb_size); - - ref = xd->plane[0].dst.buf; - ref_stride = xd->plane[0].dst.stride; - - // If the y_sad is very small, take the largest partition and exit. - // Don't check on boosted segment for now, as largest is suppressed there. - if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) { - if (!split_vert && !split_horz) { - set_block_size(cpi, x, xd, mi_row, mi_col, cm->sb_size); - return; - } - } - } else { - ref = AV1_VAR_OFFS; - ref_stride = 0; -#if CONFIG_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - switch (xd->bd) { - case 10: ref = CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10); break; - case 12: ref = CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12); break; - case 8: - default: ref = CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8); break; - } - } -#endif // CONFIG_HIGHBITDEPTH - } - - init_variance_tree( - vt, -#if CONFIG_HIGHBITDEPTH - xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, -#endif // CONFIG_HIGHBITDEPTH - cm->sb_size, (is_key_frame || low_res) ? BLOCK_4X4 : BLOCK_8X8, - pixels_wide, pixels_high, src, src_stride, ref, ref_stride); - - // Fill in the entire tree of variances and compute splits. - if (is_key_frame) { - fill_variance_tree(vt, BLOCK_4X4); - check_split_key_frame(vt, thre[1]); - } else { - fill_variance_tree(vt, BLOCK_8X8); - check_split(cpi, vt, segment_id, thre); - if (low_res) { - refine_variance_tree(vt, thre[1] << 1); - } - } - - vt->force_split |= mi_col + cm->mib_size > cm->mi_cols || - mi_row + cm->mib_size > cm->mi_rows; - - // Now go through the entire structure, splitting every block size until - // we get to one that's got a variance lower than our threshold. - set_vt_partitioning(cpi, x, xd, vt, mi_row, mi_col, thre, bmin); -} - #if CONFIG_DUAL_FILTER static void reset_intmv_filter_type(const AV1_COMMON *const cm, MACROBLOCKD *xd, MB_MODE_INFO *mbmi) { @@ -1067,7 +508,6 @@ static void reset_tx_size(MACROBLOCKD *xd, MB_MODE_INFO *mbmi, } } -#if CONFIG_REF_MV static void set_ref_and_pred_mvs(MACROBLOCK *const x, int_mv *const mi_pred_mv, int8_t rf_type) { MACROBLOCKD *const xd = &x->e_mbd; @@ -1116,7 +556,6 @@ static void set_ref_and_pred_mvs(MACROBLOCK *const x, int_mv *const mi_pred_mv, } #endif // CONFIG_EXT_INTER } -#endif // CONFIG_REF_MV static void update_state(const AV1_COMP *const cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, @@ -1144,9 +583,7 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td, const int mi_height = mi_size_high[bsize]; const int unify_bsize = CONFIG_CB4X4; -#if CONFIG_REF_MV int8_t rf_type; -#endif #if !CONFIG_SUPERTX assert(mi->mbmi.sb_type == bsize); @@ -1159,13 +596,11 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td, reset_intmv_filter_type(cm, xd, mbmi); #endif -#if CONFIG_REF_MV rf_type = av1_ref_frame_type(mbmi->ref_frame); if (x->mbmi_ext->ref_mv_count[rf_type] > 1 && (mbmi->sb_type >= BLOCK_8X8 || unify_bsize)) { set_ref_and_pred_mvs(x, mi->mbmi.pred_mv, rf_type); } -#endif // CONFIG_REF_MV // If segmentation in use if (seg->enabled) { @@ -1250,7 +685,11 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td, THR_D63_PRED /*D63_PRED*/, #if CONFIG_ALT_INTRA THR_SMOOTH, /*SMOOTH_PRED*/ -#endif // CONFIG_ALT_INTRA +#if CONFIG_SMOOTH_HV + THR_SMOOTH_V, /*SMOOTH_V_PRED*/ + THR_SMOOTH_H, /*SMOOTH_H_PRED*/ +#endif // CONFIG_SMOOTH_HV +#endif // CONFIG_ALT_INTRA THR_TM /*TM_PRED*/, }; ++mode_chosen_counts[kf_mode_index[mbmi->mode]]; @@ -1339,9 +778,7 @@ static void update_state_supertx(const AV1_COMP *const cpi, ThreadData *td, MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; int w, h; -#if CONFIG_REF_MV int8_t rf_type; -#endif *mi_addr = *mi; *x->mbmi_ext = ctx->mbmi_ext; @@ -1352,13 +789,11 @@ static void update_state_supertx(const AV1_COMP *const cpi, ThreadData *td, reset_intmv_filter_type(cm, xd, mbmi); #endif -#if CONFIG_REF_MV rf_type = av1_ref_frame_type(mbmi->ref_frame); if (x->mbmi_ext->ref_mv_count[rf_type] > 1 && (mbmi->sb_type >= BLOCK_8X8 || unify_bsize)) { set_ref_and_pred_mvs(x, mi->mbmi.pred_mv, rf_type); } -#endif // CONFIG_REF_MV // If segmentation in use if (seg->enabled) { @@ -1846,6 +1281,29 @@ static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x, return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q); } +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +static void daala_dist_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8, + BLOCK_SIZE bsize, int bw, int bh, + int mi_row, int mi_col) { + MACROBLOCKD *const xd = &x->e_mbd; + struct macroblockd_plane *const pd = &xd->plane[0]; + const int dst_stride = pd->dst.stride; + uint8_t *dst = pd->dst.buf; + + assert(bsize < BLOCK_8X8); + + if (bsize < BLOCK_8X8) { + int i, j; + uint8_t *dst_sub8x8 = &dst8x8[((mi_row & 1) * 8 + (mi_col & 1)) << 2]; + + for (j = 0; j < bh; ++j) + for (i = 0; i < bw; ++i) { + dst_sub8x8[j * 8 + i] = dst[j * dst_stride + i]; + } + } +} +#endif + static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data, MACROBLOCK *const x, int mi_row, int mi_col, RD_STATS *rd_cost, @@ -1865,7 +1323,6 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data, struct macroblockd_plane *const pd = xd->plane; const AQ_MODE aq_mode = cpi->oxcf.aq_mode; int i, orig_rdmult; - const int unify_bsize = CONFIG_CB4X4; aom_clear_system_state(); @@ -1915,7 +1372,6 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data, #endif // CONFIG_PALETTE ctx->skippable = 0; - ctx->pred_pixel_ready = 0; // Set to zero to make sure we do not use the previous encoded frame stats mbmi->skip = 0; @@ -1967,38 +1423,21 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data, *totalrate_nocoef = 0; #endif // CONFIG_SUPERTX } else { - if (bsize >= BLOCK_8X8 || unify_bsize) { - if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col, - rd_cost, bsize, ctx, best_rd); + if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { + av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col, + rd_cost, bsize, ctx, best_rd); #if CONFIG_SUPERTX - *totalrate_nocoef = rd_cost->rate; + *totalrate_nocoef = rd_cost->rate; #endif // CONFIG_SUPERTX - } else { - av1_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, -#if CONFIG_SUPERTX - totalrate_nocoef, -#endif // CONFIG_SUPERTX - bsize, ctx, best_rd); -#if CONFIG_SUPERTX - assert(*totalrate_nocoef >= 0); -#endif // CONFIG_SUPERTX - } } else { - if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - // The decoder rejects sub8x8 partitions when SEG_LVL_SKIP is set. - rd_cost->rate = INT_MAX; - } else { - av1_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, - rd_cost, + av1_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, #if CONFIG_SUPERTX - totalrate_nocoef, + totalrate_nocoef, #endif // CONFIG_SUPERTX - bsize, ctx, best_rd); + bsize, ctx, best_rd); #if CONFIG_SUPERTX - assert(*totalrate_nocoef >= 0); + assert(*totalrate_nocoef >= 0); #endif // CONFIG_SUPERTX - } } } @@ -2020,7 +1459,6 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data, ctx->dist = rd_cost->dist; } -#if CONFIG_REF_MV static void update_inter_mode_stats(FRAME_COUNTS *counts, PREDICTION_MODE mode, int16_t mode_context) { int16_t mode_ctx = mode_context & NEWMV_CTX_MASK; @@ -2050,7 +1488,6 @@ static void update_inter_mode_stats(FRAME_COUNTS *counts, PREDICTION_MODE mode, } } } -#endif static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, int mi_col @@ -2070,7 +1507,6 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, const MB_MODE_INFO *const mbmi = &mi->mbmi; const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; const BLOCK_SIZE bsize = mbmi->sb_type; - const int unify_bsize = CONFIG_CB4X4; #if CONFIG_DELTA_Q // delta quant applies to both intra and inter @@ -2125,7 +1561,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, if (cm->reference_mode == REFERENCE_MODE_SELECT) { #if !SUB8X8_COMP_REF - if (mbmi->sb_type >= BLOCK_8X8) + if (mbmi->sb_type != BLOCK_4X4) counts->comp_inter[av1_get_reference_mode_context(cm, xd)] [has_second_ref(mbmi)]++; #else @@ -2183,12 +1619,12 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, #endif // CONFIG_EXT_REFS } -#if CONFIG_EXT_INTER +#if CONFIG_EXT_INTER && CONFIG_INTERINTRA if (cm->reference_mode != COMPOUND_REFERENCE && #if CONFIG_SUPERTX !supertx_enabled && #endif - is_interintra_allowed(mbmi)) { + cm->allow_interintra_compound && is_interintra_allowed(mbmi)) { const int bsize_group = size_group_lookup[bsize]; if (mbmi->ref_frame[1] == INTRA_FRAME) { counts->interintra[bsize_group][1]++; @@ -2199,7 +1635,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, counts->interintra[bsize_group][0]++; } } -#endif // CONFIG_EXT_INTER +#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION const MOTION_MODE motion_allowed = motion_mode_allowed( @@ -2242,105 +1678,67 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, if (inter_block && !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { int16_t mode_ctx; -#if !CONFIG_REF_MV - mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]]; -#endif - if (bsize >= BLOCK_8X8 || unify_bsize) { - const PREDICTION_MODE mode = mbmi->mode; -#if CONFIG_REF_MV + const PREDICTION_MODE mode = mbmi->mode; #if CONFIG_EXT_INTER - if (has_second_ref(mbmi)) { - mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]]; - ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)]; - } else { + if (has_second_ref(mbmi)) { + mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]]; + ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)]; + } else { #endif // CONFIG_EXT_INTER - mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context, - mbmi->ref_frame, bsize, -1); - update_inter_mode_stats(counts, mode, mode_ctx); + mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context, + mbmi->ref_frame, bsize, -1); + update_inter_mode_stats(counts, mode, mode_ctx); #if CONFIG_EXT_INTER - } + } #endif // CONFIG_EXT_INTER #if CONFIG_EXT_INTER - if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) { + if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) { #else - if (mbmi->mode == NEWMV) { + if (mbmi->mode == NEWMV) { #endif - uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); - int idx; + uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); + int idx; - for (idx = 0; idx < 2; ++idx) { - if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { - uint8_t drl_ctx = - av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); - ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx]; + for (idx = 0; idx < 2; ++idx) { + if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { + uint8_t drl_ctx = + av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); + ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx]; - if (mbmi->ref_mv_idx == idx) break; - } + if (mbmi->ref_mv_idx == idx) break; } } + } #if CONFIG_EXT_INTER - if (have_nearmv_in_inter_mode(mbmi->mode)) { + if (have_nearmv_in_inter_mode(mbmi->mode)) { #else - if (mbmi->mode == NEARMV) { + if (mbmi->mode == NEARMV) { #endif - uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); - int idx; + uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); + int idx; - for (idx = 1; idx < 3; ++idx) { - if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { - uint8_t drl_ctx = - av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); - ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1]; + for (idx = 1; idx < 3; ++idx) { + if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { + uint8_t drl_ctx = + av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); + ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1]; - if (mbmi->ref_mv_idx == idx - 1) break; - } - } - } -#else -#if CONFIG_EXT_INTER - if (is_inter_compound_mode(mode)) - ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)]; - else -#endif // CONFIG_EXT_INTER - ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)]; -#endif - } else { - const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; - const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; - int idx, idy; - for (idy = 0; idy < 2; idy += num_4x4_h) { - for (idx = 0; idx < 2; idx += num_4x4_w) { - const int j = idy * 2 + idx; - const PREDICTION_MODE b_mode = mi->bmi[j].as_mode; -#if CONFIG_REF_MV -#if CONFIG_EXT_INTER - if (has_second_ref(mbmi)) { - mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]]; - ++counts->inter_compound_mode[mode_ctx] - [INTER_COMPOUND_OFFSET(b_mode)]; - } else { -#endif // CONFIG_EXT_INTER - mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context, - mbmi->ref_frame, bsize, j); - update_inter_mode_stats(counts, b_mode, mode_ctx); -#if CONFIG_EXT_INTER - } -#endif // CONFIG_EXT_INTER -#else -#if CONFIG_EXT_INTER - if (is_inter_compound_mode(b_mode)) - ++counts->inter_compound_mode[mode_ctx] - [INTER_COMPOUND_OFFSET(b_mode)]; - else -#endif // CONFIG_EXT_INTER - ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)]; -#endif + if (mbmi->ref_mv_idx == idx - 1) break; } } } } +#if CONFIG_INTRABC + } else { + if (cm->allow_screen_content_tools && bsize >= BLOCK_8X8) { + FRAME_COUNTS *const counts = td->counts; + ++counts->intrabc[mbmi->use_intrabc]; + } else { + assert(!mbmi->use_intrabc); + } +#endif } } @@ -2352,8 +1750,8 @@ typedef struct { #if CONFIG_VAR_TX TXFM_CONTEXT *p_ta; TXFM_CONTEXT *p_tl; - TXFM_CONTEXT ta[MAX_MIB_SIZE]; - TXFM_CONTEXT tl[MAX_MIB_SIZE]; + TXFM_CONTEXT ta[2 * MAX_MIB_SIZE]; + TXFM_CONTEXT tl[2 * MAX_MIB_SIZE]; #endif } RD_SEARCH_MACROBLOCK_CONTEXT; @@ -2373,12 +1771,15 @@ static void restore_context(MACROBLOCK *x, int mi_width = mi_size_wide[bsize]; int mi_height = mi_size_high[bsize]; for (p = 0; p < MAX_MB_PLANE; p++) { - memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), + int tx_col; + int tx_row; + tx_col = mi_col << (MI_SIZE_LOG2 - tx_size_wide_log2[0]); + tx_row = (mi_row & MAX_MIB_MASK) << (MI_SIZE_LOG2 - tx_size_high_log2[0]); + memcpy(xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x), ctx->a + num_4x4_blocks_wide * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> xd->plane[p].subsampling_x); - memcpy(xd->left_context[p] + - ((mi_row & MAX_MIB_MASK) * 2 >> xd->plane[p].subsampling_y), + memcpy(xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y), ctx->l + num_4x4_blocks_high * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); @@ -2391,9 +1792,9 @@ static void restore_context(MACROBLOCK *x, xd->above_txfm_context = ctx->p_ta; xd->left_txfm_context = ctx->p_tl; memcpy(xd->above_txfm_context, ctx->ta, - sizeof(*xd->above_txfm_context) * mi_width); + sizeof(*xd->above_txfm_context) * (mi_width << TX_UNIT_WIDE_LOG2)); memcpy(xd->left_txfm_context, ctx->tl, - sizeof(*xd->left_txfm_context) * mi_height); + sizeof(*xd->left_txfm_context) * (mi_height << TX_UNIT_HIGH_LOG2)); #endif #if CONFIG_PVQ od_encode_rollback(&x->daala_enc, rdo_buf); @@ -2417,13 +1818,16 @@ static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx, // buffer the above/left context information of the block in search. for (p = 0; p < MAX_MB_PLANE; ++p) { + int tx_col; + int tx_row; + tx_col = mi_col << (MI_SIZE_LOG2 - tx_size_wide_log2[0]); + tx_row = (mi_row & MAX_MIB_MASK) << (MI_SIZE_LOG2 - tx_size_high_log2[0]); memcpy(ctx->a + num_4x4_blocks_wide * p, - xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), + xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> xd->plane[p].subsampling_x); memcpy(ctx->l + num_4x4_blocks_high * p, - xd->left_context[p] + - ((mi_row & MAX_MIB_MASK) * 2 >> xd->plane[p].subsampling_y), + xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } @@ -2433,9 +1837,9 @@ static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx, sizeof(xd->left_seg_context[0]) * mi_height); #if CONFIG_VAR_TX memcpy(ctx->ta, xd->above_txfm_context, - sizeof(*xd->above_txfm_context) * mi_width); + sizeof(*xd->above_txfm_context) * (mi_width << TX_UNIT_WIDE_LOG2)); memcpy(ctx->tl, xd->left_txfm_context, - sizeof(*xd->left_txfm_context) * mi_height); + sizeof(*xd->left_txfm_context) * (mi_height << TX_UNIT_HIGH_LOG2)); ctx->p_ta = xd->above_txfm_context; ctx->p_tl = xd->left_txfm_context; #endif @@ -2479,7 +1883,7 @@ static void encode_b(const AV1_COMP *const cpi, const TileInfo *const tile, get_frame_new_buffer(&cpi->common), mi_row, mi_col); } #endif - encode_superblock(cpi, td, tp, dry_run, mi_row, mi_col, bsize, ctx, rate); + encode_superblock(cpi, td, tp, dry_run, mi_row, mi_col, bsize, rate); if (!dry_run) { #if CONFIG_EXT_DELTA_Q @@ -2563,12 +1967,13 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td, if (!x->skip) { int this_rate = 0; av1_encode_sb_supertx((AV1_COMMON *)cm, x, bsize); - av1_tokenize_sb_supertx(cpi, td, tp, dry_run, bsize, rate); + av1_tokenize_sb_supertx(cpi, td, tp, dry_run, mi_row, mi_col, bsize, + rate); if (rate) *rate += this_rate; } else { xd->mi[0]->mbmi.skip = 1; if (!dry_run) td->counts->skip[av1_get_skip_context(xd)][1]++; - reset_skip_context(xd, bsize); + av1_reset_skip_context(xd, mi_row, mi_col, bsize); } if (!dry_run) { for (y_idx = 0; y_idx < mi_height; y_idx++) @@ -2849,9 +2254,10 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, pc_tree->partitioning = partition; #if CONFIG_VAR_TX - xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = - xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); + xd->above_txfm_context = + cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2); + xd->left_txfm_context = xd->left_txfm_context_buffer + + ((mi_row & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2); #endif #if !CONFIG_PVQ save_context(x, &x_ctx, mi_row, mi_col, bsize); @@ -2943,7 +2349,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, av1_init_rd_stats(&tmp_rdc); update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1); encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize, - ctx_h, NULL); + NULL); rd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc, #if CONFIG_SUPERTX &rt_nocoef, @@ -2986,7 +2392,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, av1_init_rd_stats(&tmp_rdc); update_state(cpi, td, ctx_v, mi_row, mi_col, subsize, 1); encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize, - ctx_v, NULL); + NULL); rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc, #if CONFIG_SUPERTX &rt_nocoef, @@ -3566,7 +2972,7 @@ static void rd_test_partition3( PICK_MODE_CONTEXT *ctx_0 = &ctxs[0]; update_state(cpi, td, ctx_0, mi_row0, mi_col0, subsize0, 1); encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row0, mi_col0, subsize0, - ctx_0, NULL); + NULL); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_0); @@ -3607,7 +3013,7 @@ static void rd_test_partition3( PICK_MODE_CONTEXT *ctx_1 = &ctxs[1]; update_state(cpi, td, ctx_1, mi_row1, mi_col1, subsize1, 1); encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row1, mi_col1, subsize1, - ctx_1, NULL); + NULL); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_1); @@ -3865,9 +3271,10 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, } #if CONFIG_VAR_TX - xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = - xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); + xd->above_txfm_context = + cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2); + xd->left_txfm_context = xd->left_txfm_context_buffer + + ((mi_row & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2); #endif #if !CONFIG_PVQ save_context(x, &x_ctx, mi_row, mi_col, bsize); @@ -4157,9 +3564,29 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_SUPERTX sum_rate_nocoef += this_rate_nocoef; #endif // CONFIG_SUPERTX +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + sum_rdc.dist_y += this_rdc.dist_y; +#endif } } reached_last_index = (idx == 4); + +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (reached_last_index && sum_rdc.rdcost != INT64_MAX && + bsize == BLOCK_8X8) { + int use_activity_masking = 0; + int64_t daala_dist; + const int src_stride = x->plane[0].src.stride; + daala_dist = av1_daala_dist(x->plane[0].src.buf - 4 * src_stride - 4, + src_stride, x->decoded_8x8, 8, 8, 8, 1, + use_activity_masking, x->qindex) + << 4; + sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist; + sum_rdc.rdcost = + RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + } +#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 + #if CONFIG_SUPERTX if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && reached_last_index) { TX_SIZE supertx_size = max_txsize_lookup[bsize]; @@ -4267,7 +3694,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx_h = &pc_tree->horizontal[0]; update_state(cpi, td, ctx_h, mi_row, mi_col, subsize, 1); encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize, - ctx_h, NULL); + NULL); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_h); @@ -4297,6 +3724,16 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, subsize, &pc_tree->horizontal[1], best_rdc.rdcost - sum_rdc.rdcost); #endif // CONFIG_SUPERTX + +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) { + update_state(cpi, td, &pc_tree->horizontal[1], mi_row + mi_step, mi_col, + subsize, DRY_RUN_NORMAL); + encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row + mi_step, mi_col, + subsize, NULL); + } +#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; #if CONFIG_SUPERTX @@ -4309,7 +3746,24 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_SUPERTX sum_rate_nocoef += this_rate_nocoef; #endif // CONFIG_SUPERTX +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + sum_rdc.dist_y += this_rdc.dist_y; +#endif } +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) { + int use_activity_masking = 0; + int64_t daala_dist; + const int src_stride = x->plane[0].src.stride; + daala_dist = av1_daala_dist(x->plane[0].src.buf - 4 * src_stride, + src_stride, x->decoded_8x8, 8, 8, 8, 1, + use_activity_masking, x->qindex) + << 4; + sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist; + sum_rdc.rdcost = + RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + } +#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 } #if CONFIG_SUPERTX @@ -4413,7 +3867,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, !force_vert_split && (bsize > BLOCK_8X8 || unify_bsize)) { update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 1); encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, subsize, - &pc_tree->vertical[0], NULL); + NULL); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none); @@ -4444,6 +3898,16 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, subsize, &pc_tree->vertical[1], best_rdc.rdcost - sum_rdc.rdcost); #endif // CONFIG_SUPERTX + +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) { + update_state(cpi, td, &pc_tree->vertical[1], mi_row, mi_col + mi_step, + subsize, DRY_RUN_NORMAL); + encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col + mi_step, + subsize, NULL); + } +#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; #if CONFIG_SUPERTX @@ -4456,7 +3920,24 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_SUPERTX sum_rate_nocoef += this_rate_nocoef; #endif // CONFIG_SUPERTX +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + sum_rdc.dist_y += this_rdc.dist_y; +#endif } +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) { + int use_activity_masking = 0; + int64_t daala_dist; + const int src_stride = x->plane[0].src.stride; + daala_dist = + av1_daala_dist(x->plane[0].src.buf - 4, src_stride, x->decoded_8x8, + 8, 8, 8, 1, use_activity_masking, x->qindex) + << 4; + sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist; + sum_rdc.rdcost = + RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + } +#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 } #if CONFIG_SUPERTX if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && !abort_flag) { @@ -4612,6 +4093,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, x->cfl_store_y = 0; #endif +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && + bsize == BLOCK_4X4 && pc_tree->index == 3) { + encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, + pc_tree, NULL); + } +#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (bsize == cm->sb_size) { #if !CONFIG_PVQ && !CONFIG_LV_MAP assert(tp_orig < *tp || (tp_orig == *tp && xd->mi[0]->mbmi.skip)); @@ -4762,14 +4251,6 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td, &dummy_rate_nocoef, #endif // CONFIG_SUPERTX 1, pc_root); - } else if (sf->partition_search_type == VAR_BASED_PARTITION) { - choose_partitioning(cpi, td, tile_info, x, mi_row, mi_col); - rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, cm->sb_size, - &dummy_rate, &dummy_dist, -#if CONFIG_SUPERTX - &dummy_rate_nocoef, -#endif // CONFIG_SUPERTX - 1, pc_root); } else { // If required set upper and lower partition size limits if (sf->auto_min_max_partition_size) { @@ -4785,32 +4266,6 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td, INT64_MAX, pc_root); } } -#if CONFIG_SUBFRAME_PROB_UPDATE - if (cm->do_subframe_update && - cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) { - const int mi_rows_per_update = - MI_SIZE * AOMMAX(cm->mi_rows / MI_SIZE / COEF_PROBS_BUFS, 1); - if ((mi_row + MI_SIZE) % mi_rows_per_update == 0 && - mi_row + MI_SIZE < cm->mi_rows && - cm->coef_probs_update_idx < COEF_PROBS_BUFS - 1) { - TX_SIZE t; - SUBFRAME_STATS *subframe_stats = &cpi->subframe_stats; - - for (t = 0; t < TX_SIZES; ++t) - av1_full_to_model_counts(cpi->td.counts->coef[t], - cpi->td.rd_counts.coef_counts[t]); - av1_partial_adapt_probs(cm, mi_row, mi_col); - ++cm->coef_probs_update_idx; - av1_copy(subframe_stats->coef_probs_buf[cm->coef_probs_update_idx], - cm->fc->coef_probs); - av1_copy(subframe_stats->coef_counts_buf[cm->coef_probs_update_idx], - cpi->td.rd_counts.coef_counts); - av1_copy(subframe_stats->eob_counts_buf[cm->coef_probs_update_idx], - cm->counts.eob_branch); - av1_fill_token_costs(x->token_costs, cm->fc->coef_probs); - } - } -#endif // CONFIG_SUBFRAME_PROB_UPDATE } static void init_encode_frame_mb_context(AV1_COMP *cpi) { @@ -5041,16 +4496,11 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row, } } -#if CONFIG_DAALA_EC +#if !CONFIG_ANS od_ec_enc_init(&td->mb.daala_enc.w.ec, 65025); -#else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." -#endif - -#if CONFIG_DAALA_EC od_ec_enc_reset(&td->mb.daala_enc.w.ec); #else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +#error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif #endif // #if CONFIG_PVQ @@ -5079,10 +4529,10 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row, (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]); assert(cpi->tok_count[tile_row][tile_col] <= allocated_tokens(*tile_info)); #if CONFIG_PVQ -#if CONFIG_DAALA_EC +#if !CONFIG_ANS od_ec_enc_clear(&td->mb.daala_enc.w.ec); #else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +#error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif td->mb.pvq_q->last_pos = td->mb.pvq_q->curr_pos; @@ -5186,6 +4636,24 @@ static int gm_get_params_cost(WarpedMotionParams *gm, } return (params_cost << AV1_PROB_COST_SHIFT); } + +static int do_gm_search_logic(SPEED_FEATURES *const sf, int num_refs_using_gm, + int frame) { + (void)num_refs_using_gm; + (void)frame; + switch (sf->gm_search_type) { + case GM_FULL_SEARCH: return 1; + case GM_REDUCED_REF_SEARCH: +#if CONFIG_EXT_REFS + return !(frame == LAST2_FRAME || frame == LAST3_FRAME); +#else + return (num_refs_using_gm < 2); +#endif // CONFIG_EXT_REFS + case GM_DISABLE_SEARCH: return 0; + default: assert(0); + } + return 1; +} #endif // CONFIG_GLOBAL_MOTION static void encode_frame_internal(AV1_COMP *cpi) { @@ -5205,9 +4673,7 @@ static void encode_frame_internal(AV1_COMP *cpi) { x->min_partition_size = AOMMIN(x->min_partition_size, cm->sb_size); x->max_partition_size = AOMMIN(x->max_partition_size, cm->sb_size); -#if CONFIG_REF_MV cm->setup_mi(cm); -#endif xd->mi = cm->mi_grid_visible; xd->mi[0] = cm->mi; @@ -5218,27 +4684,46 @@ static void encode_frame_internal(AV1_COMP *cpi) { #if CONFIG_GLOBAL_MOTION av1_zero(rdc->global_motion_used); + av1_zero(cpi->gmparams_cost); if (cpi->common.frame_type == INTER_FRAME && cpi->source && !cpi->global_motion_search_done) { - YV12_BUFFER_CONFIG *ref_buf; + YV12_BUFFER_CONFIG *ref_buf[TOTAL_REFS_PER_FRAME]; int frame; double params_by_motion[RANSAC_NUM_MOTIONS * (MAX_PARAMDIM - 1)]; const double *params_this_motion; int inliers_by_motion[RANSAC_NUM_MOTIONS]; WarpedMotionParams tmp_wm_params; - static const double kInfiniteErrAdv = 1e12; static const double kIdentityParams[MAX_PARAMDIM - 1] = { 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0 }; + int num_refs_using_gm = 0; for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) { - ref_buf = get_ref_frame_buffer(cpi, frame); - if (ref_buf) { + ref_buf[frame] = get_ref_frame_buffer(cpi, frame); + int pframe; + // check for duplicate buffer + for (pframe = LAST_FRAME; pframe < frame; ++pframe) { + if (ref_buf[frame] == ref_buf[pframe]) break; + } + if (pframe < frame) { + memcpy(&cm->global_motion[frame], &cm->global_motion[pframe], + sizeof(WarpedMotionParams)); + } else if (ref_buf[frame] && + do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame)) { TransformationType model; + const int64_t ref_frame_error = av1_frame_error( +#if CONFIG_HIGHBITDEPTH + xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, +#endif // CONFIG_HIGHBITDEPTH + ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride, + cpi->source->y_buffer, 0, 0, cpi->source->y_width, + cpi->source->y_height, cpi->source->y_stride); + + if (ref_frame_error == 0) continue; + aom_clear_system_state(); for (model = ROTZOOM; model < GLOBAL_TRANS_TYPES_ENC; ++model) { - double best_erroradvantage = kInfiniteErrAdv; - + int64_t best_warp_error = INT64_MAX; // Initially set all params to identity. for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) { memcpy(params_by_motion + (MAX_PARAMDIM - 1) * i, kIdentityParams, @@ -5246,7 +4731,7 @@ static void encode_frame_internal(AV1_COMP *cpi) { } compute_global_motion_feature_based( - model, cpi->source, ref_buf, + model, cpi->source, ref_buf[frame], #if CONFIG_HIGHBITDEPTH cpi->common.bit_depth, #endif // CONFIG_HIGHBITDEPTH @@ -5259,17 +4744,17 @@ static void encode_frame_internal(AV1_COMP *cpi) { convert_model_to_params(params_this_motion, &tmp_wm_params); if (tmp_wm_params.wmtype != IDENTITY) { - const double erroradv_this_motion = refine_integerized_param( + const int64_t warp_error = refine_integerized_param( &tmp_wm_params, tmp_wm_params.wmtype, #if CONFIG_HIGHBITDEPTH xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, #endif // CONFIG_HIGHBITDEPTH - ref_buf->y_buffer, ref_buf->y_width, ref_buf->y_height, - ref_buf->y_stride, cpi->source->y_buffer, - cpi->source->y_width, cpi->source->y_height, - cpi->source->y_stride, 3); - if (erroradv_this_motion < best_erroradvantage) { - best_erroradvantage = erroradv_this_motion; + ref_buf[frame]->y_buffer, ref_buf[frame]->y_width, + ref_buf[frame]->y_height, ref_buf[frame]->y_stride, + cpi->source->y_buffer, cpi->source->y_width, + cpi->source->y_height, cpi->source->y_stride, 3); + if (warp_error < best_warp_error) { + best_warp_error = warp_error; // Save the wm_params modified by refine_integerized_param() // rather than motion index to avoid rerunning refine() below. memcpy(&(cm->global_motion[frame]), &tmp_wm_params, @@ -5295,17 +4780,17 @@ static void encode_frame_internal(AV1_COMP *cpi) { // If the best error advantage found doesn't meet the threshold for // this motion type, revert to IDENTITY. if (!is_enough_erroradvantage( - best_erroradvantage, + (double)best_warp_error / ref_frame_error, gm_get_params_cost(&cm->global_motion[frame], &cm->prev_frame->global_motion[frame], cm->allow_high_precision_mv))) { set_default_warp_params(&cm->global_motion[frame]); } - if (cm->global_motion[frame].wmtype != IDENTITY) break; } aom_clear_system_state(); } + if (cm->global_motion[frame].wmtype != IDENTITY) num_refs_using_gm++; cpi->gmparams_cost[frame] = gm_get_params_cost(&cm->global_motion[frame], &cm->prev_frame->global_motion[frame], @@ -5352,21 +4837,8 @@ static void encode_frame_internal(AV1_COMP *cpi) { av1_initialize_rd_consts(cpi); av1_initialize_me_consts(cpi, x, cm->base_qindex); init_encode_frame_mb_context(cpi); -#if CONFIG_TEMPMV_SIGNALING - if (last_fb_buf_idx != INVALID_IDX) { - cm->prev_frame = &cm->buffer_pool->frame_bufs[last_fb_buf_idx]; - cm->use_prev_frame_mvs &= !cm->error_resilient_mode && - cm->width == cm->prev_frame->buf.y_width && - cm->height == cm->prev_frame->buf.y_height && - !cm->intra_only && !cm->prev_frame->intra_only; - } -#else - cm->use_prev_frame_mvs = - !cm->error_resilient_mode && cm->width == cm->last_width && - cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame; -#endif -#if CONFIG_EXT_REFS +#if CONFIG_EXT_REFS || CONFIG_TEMPMV_SIGNALING // NOTE(zoeliu): As cm->prev_frame can take neither a frame of // show_exisiting_frame=1, nor can it take a frame not used as // a reference, it is probable that by the time it is being @@ -5377,11 +4849,29 @@ static void encode_frame_internal(AV1_COMP *cpi) { // (1) Simply disable the use of previous frame mvs; or // (2) Have cm->prev_frame point to one reference frame buffer, // e.g. LAST_FRAME. - if (cm->use_prev_frame_mvs && !enc_is_ref_frame_buf(cpi, cm->prev_frame)) { + if (!enc_is_ref_frame_buf(cpi, cm->prev_frame)) { // Reassign the LAST_FRAME buffer to cm->prev_frame. - cm->prev_frame = &cm->buffer_pool->frame_bufs[last_fb_buf_idx]; + cm->prev_frame = last_fb_buf_idx != INVALID_IDX + ? &cm->buffer_pool->frame_bufs[last_fb_buf_idx] + : NULL; } -#endif // CONFIG_EXT_REFS +#endif // CONFIG_EXT_REFS || CONFIG_TEMPMV_SIGNALING + +#if CONFIG_TEMPMV_SIGNALING + if (cm->prev_frame) { + cm->use_prev_frame_mvs &= !cm->error_resilient_mode && + cm->width == cm->prev_frame->buf.y_width && + cm->height == cm->prev_frame->buf.y_height && + !cm->intra_only && !cm->prev_frame->intra_only; + } else { + cm->use_prev_frame_mvs = 0; + } +#else + cm->use_prev_frame_mvs = !cm->error_resilient_mode && cm->prev_frame && + cm->width == cm->prev_frame->buf.y_crop_width && + cm->height == cm->prev_frame->buf.y_crop_height && + !cm->intra_only && cm->last_show_frame; +#endif // CONFIG_TEMPMV_SIGNALING // Special case: set prev_mi to NULL when the previous mode info // context cannot be used. @@ -5390,14 +4880,8 @@ static void encode_frame_internal(AV1_COMP *cpi) { #if CONFIG_VAR_TX x->txb_split_count = 0; -#if CONFIG_REF_MV av1_zero(x->blk_skip_drl); #endif -#endif - - if (cpi->sf.partition_search_type == VAR_BASED_PARTITION && - cpi->td.var_root[0] == NULL) - av1_setup_var_tree(&cpi->common, &cpi->td); { struct aom_usec_timer emr_timer; @@ -5429,6 +4913,20 @@ static void encode_frame_internal(AV1_COMP *cpi) { #endif } +#if CONFIG_EXT_INTER +static void make_consistent_compound_tools(AV1_COMMON *cm) { + (void)cm; +#if CONFIG_INTERINTRA + if (frame_is_intra_only(cm) || cm->reference_mode == COMPOUND_REFERENCE) + cm->allow_interintra_compound = 0; +#endif // CONFIG_INTERINTRA +#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE + if (frame_is_intra_only(cm) || cm->reference_mode == SINGLE_REFERENCE) + cm->allow_masked_compound = 0; +#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE +} +#endif // CONFIG_EXT_INTER + void av1_encode_frame(AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; #if CONFIG_EXT_TX @@ -5444,29 +4942,15 @@ void av1_encode_frame(AV1_COMP *cpi) { // side behavior is where the ALT ref buffer has opposite sign bias to // the other two. if (!frame_is_intra_only(cm)) { -#if CONFIG_LOWDELAY_COMPOUND // Normative in encoder - cpi->allow_comp_inter_inter = 1; -#if CONFIG_EXT_REFS - cm->comp_fwd_ref[0] = LAST_FRAME; - cm->comp_fwd_ref[1] = LAST2_FRAME; - cm->comp_fwd_ref[2] = LAST3_FRAME; - cm->comp_fwd_ref[3] = GOLDEN_FRAME; - cm->comp_bwd_ref[0] = BWDREF_FRAME; - cm->comp_bwd_ref[1] = ALTREF_FRAME; -#else - cm->comp_fixed_ref = ALTREF_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = GOLDEN_FRAME; -#endif // CONFIG_EXT_REFS -#else +#if !CONFIG_ONE_SIDED_COMPOUND if ((cm->ref_frame_sign_bias[ALTREF_FRAME] == cm->ref_frame_sign_bias[GOLDEN_FRAME]) || (cm->ref_frame_sign_bias[ALTREF_FRAME] == cm->ref_frame_sign_bias[LAST_FRAME])) { cpi->allow_comp_inter_inter = 0; } else { +#endif cpi->allow_comp_inter_inter = 1; - #if CONFIG_EXT_REFS cm->comp_fwd_ref[0] = LAST_FRAME; cm->comp_fwd_ref[1] = LAST2_FRAME; @@ -5475,10 +4959,11 @@ void av1_encode_frame(AV1_COMP *cpi) { cm->comp_bwd_ref[0] = BWDREF_FRAME; cm->comp_bwd_ref[1] = ALTREF_FRAME; #else - cm->comp_fixed_ref = ALTREF_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = GOLDEN_FRAME; -#endif // CONFIG_EXT_REFS + cm->comp_fixed_ref = ALTREF_FRAME; + cm->comp_var_ref[0] = LAST_FRAME; + cm->comp_var_ref[1] = GOLDEN_FRAME; +#endif // CONFIG_EXT_REFS +#if !CONFIG_ONE_SIDED_COMPOUND // Normative in encoder } #endif } else { @@ -5529,6 +5014,9 @@ void av1_encode_frame(AV1_COMP *cpi) { cm->interp_filter = SWITCHABLE; #endif +#if CONFIG_EXT_INTER + make_consistent_compound_tools(cm); +#endif // CONFIG_EXT_INTER encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) @@ -5553,12 +5041,19 @@ void av1_encode_frame(AV1_COMP *cpi) { #endif // !CONFIG_REF_ADAPT } } +#if CONFIG_EXT_INTER + make_consistent_compound_tools(cm); +#endif // CONFIG_EXT_INTER #if CONFIG_VAR_TX if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0) cm->tx_mode = ALLOW_32X32 + CONFIG_TX64X64; #else +#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT + if (cm->tx_mode == TX_MODE_SELECT && counts->quarter_tx_size[1] == 0) { +#else if (cm->tx_mode == TX_MODE_SELECT) { +#endif #if CONFIG_TX64X64 int count4x4 = 0; int count8x8_8x8p = 0, count8x8_lp = 0; @@ -5566,41 +5061,50 @@ void av1_encode_frame(AV1_COMP *cpi) { int count32x32_32x32p = 0, count32x32_lp = 0; int count64x64_64x64p = 0; for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { + int depth; // counts->tx_size[max_depth][context_idx][this_depth_level] - count4x4 += counts->tx_size[0][i][0]; - count4x4 += counts->tx_size[1][i][0]; - count4x4 += counts->tx_size[2][i][0]; - count4x4 += counts->tx_size[3][i][0]; - - count8x8_8x8p += counts->tx_size[0][i][1]; - count8x8_lp += counts->tx_size[1][i][1]; - count8x8_lp += counts->tx_size[2][i][1]; - count8x8_lp += counts->tx_size[3][i][1]; - - count16x16_16x16p += counts->tx_size[1][i][2]; - count16x16_lp += counts->tx_size[2][i][2]; - count16x16_lp += counts->tx_size[3][i][2]; - - count32x32_32x32p += counts->tx_size[2][i][3]; - count32x32_lp += counts->tx_size[3][i][3]; - - count64x64_64x64p += counts->tx_size[3][i][4]; + depth = tx_size_to_depth(TX_4X4); + count4x4 += counts->tx_size[TX_8X8 - TX_SIZE_CTX_MIN][i][depth]; + count4x4 += counts->tx_size[TX_16X16 - TX_SIZE_CTX_MIN][i][depth]; + count4x4 += counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth]; + count4x4 += counts->tx_size[TX_64X64 - TX_SIZE_CTX_MIN][i][depth]; + + depth = tx_size_to_depth(TX_8X8); + count8x8_8x8p += counts->tx_size[TX_8X8 - TX_SIZE_CTX_MIN][i][depth]; + count8x8_lp += counts->tx_size[TX_16X16 - TX_SIZE_CTX_MIN][i][depth]; + count8x8_lp += counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth]; + count8x8_lp += counts->tx_size[TX_64X64 - TX_SIZE_CTX_MIN][i][depth]; + + depth = tx_size_to_depth(TX_16X16); + count16x16_16x16p += + counts->tx_size[TX_16X16 - TX_SIZE_CTX_MIN][i][depth]; + count16x16_lp += counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth]; + count16x16_lp += counts->tx_size[TX_64X64 - TX_SIZE_CTX_MIN][i][depth]; + + depth = tx_size_to_depth(TX_32X32); + count32x32_32x32p += + counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth]; + count32x32_lp += counts->tx_size[TX_64X64 - TX_SIZE_CTX_MIN][i][depth]; + + depth = tx_size_to_depth(TX_64X64); + count64x64_64x64p += + counts->tx_size[TX_64X64 - TX_SIZE_CTX_MIN][i][depth]; } #if CONFIG_EXT_TX && CONFIG_RECT_TX - count4x4 += counts->tx_size_implied[0][TX_4X4]; - count4x4 += counts->tx_size_implied[1][TX_4X4]; - count4x4 += counts->tx_size_implied[2][TX_4X4]; - count4x4 += counts->tx_size_implied[3][TX_4X4]; - count8x8_8x8p += counts->tx_size_implied[1][TX_8X8]; - count8x8_lp += counts->tx_size_implied[2][TX_8X8]; - count8x8_lp += counts->tx_size_implied[3][TX_8X8]; - count8x8_lp += counts->tx_size_implied[4][TX_8X8]; - count16x16_16x16p += counts->tx_size_implied[2][TX_16X16]; - count16x16_lp += counts->tx_size_implied[3][TX_16X16]; - count16x16_lp += counts->tx_size_implied[4][TX_16X16]; - count32x32_32x32p += counts->tx_size_implied[3][TX_32X32]; - count32x32_lp += counts->tx_size_implied[4][TX_32X32]; - count64x64_64x64p += counts->tx_size_implied[4][TX_64X64]; + count4x4 += counts->tx_size_implied[TX_4X4][TX_4X4]; + count4x4 += counts->tx_size_implied[TX_8X8][TX_4X4]; + count4x4 += counts->tx_size_implied[TX_16X16][TX_4X4]; + count4x4 += counts->tx_size_implied[TX_32X32][TX_4X4]; + count8x8_8x8p += counts->tx_size_implied[TX_8X8][TX_8X8]; + count8x8_lp += counts->tx_size_implied[TX_16X16][TX_8X8]; + count8x8_lp += counts->tx_size_implied[TX_32X32][TX_8X8]; + count8x8_lp += counts->tx_size_implied[TX_64X64][TX_8X8]; + count16x16_16x16p += counts->tx_size_implied[TX_16X16][TX_16X16]; + count16x16_lp += counts->tx_size_implied[TX_32X32][TX_16X16]; + count16x16_lp += counts->tx_size_implied[TX_64X64][TX_16X16]; + count32x32_32x32p += counts->tx_size_implied[TX_32X32][TX_32X32]; + count32x32_lp += counts->tx_size_implied[TX_64X64][TX_32X32]; + count64x64_64x64p += counts->tx_size_implied[TX_64X64][TX_64X64]; #endif // CONFIG_EXT_TX && CONFIG_RECT_TX if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && count32x32_lp == 0 && count32x32_32x32p == 0 && @@ -5652,30 +5156,37 @@ void av1_encode_frame(AV1_COMP *cpi) { int count16x16_16x16p = 0, count16x16_lp = 0; int count32x32 = 0; for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { + int depth; // counts->tx_size[max_depth][context_idx][this_depth_level] - count4x4 += counts->tx_size[0][i][0]; - count4x4 += counts->tx_size[1][i][0]; - count4x4 += counts->tx_size[2][i][0]; - - count8x8_8x8p += counts->tx_size[0][i][1]; - count8x8_lp += counts->tx_size[1][i][1]; - count8x8_lp += counts->tx_size[2][i][1]; - - count16x16_16x16p += counts->tx_size[1][i][2]; - count16x16_lp += counts->tx_size[2][i][2]; - count32x32 += counts->tx_size[2][i][3]; + depth = tx_size_to_depth(TX_4X4); + count4x4 += counts->tx_size[TX_8X8 - TX_SIZE_CTX_MIN][i][depth]; + count4x4 += counts->tx_size[TX_16X16 - TX_SIZE_CTX_MIN][i][depth]; + count4x4 += counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth]; + + depth = tx_size_to_depth(TX_8X8); + count8x8_8x8p += counts->tx_size[TX_8X8 - TX_SIZE_CTX_MIN][i][depth]; + count8x8_lp += counts->tx_size[TX_16X16 - TX_SIZE_CTX_MIN][i][depth]; + count8x8_lp += counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth]; + + depth = tx_size_to_depth(TX_16X16); + count16x16_16x16p += + counts->tx_size[TX_16X16 - TX_SIZE_CTX_MIN][i][depth]; + count16x16_lp += counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth]; + + depth = tx_size_to_depth(TX_32X32); + count32x32 += counts->tx_size[TX_32X32 - TX_SIZE_CTX_MIN][i][depth]; } #if CONFIG_EXT_TX && CONFIG_RECT_TX - count4x4 += counts->tx_size_implied[0][TX_4X4]; - count4x4 += counts->tx_size_implied[1][TX_4X4]; - count4x4 += counts->tx_size_implied[2][TX_4X4]; - count4x4 += counts->tx_size_implied[3][TX_4X4]; - count8x8_8x8p += counts->tx_size_implied[1][TX_8X8]; - count8x8_lp += counts->tx_size_implied[2][TX_8X8]; - count8x8_lp += counts->tx_size_implied[3][TX_8X8]; - count16x16_lp += counts->tx_size_implied[3][TX_16X16]; - count16x16_16x16p += counts->tx_size_implied[2][TX_16X16]; - count32x32 += counts->tx_size_implied[3][TX_32X32]; + count4x4 += counts->tx_size_implied[TX_4X4][TX_4X4]; + count4x4 += counts->tx_size_implied[TX_8X8][TX_4X4]; + count4x4 += counts->tx_size_implied[TX_16X16][TX_4X4]; + count4x4 += counts->tx_size_implied[TX_32X32][TX_4X4]; + count8x8_8x8p += counts->tx_size_implied[TX_8X8][TX_8X8]; + count8x8_lp += counts->tx_size_implied[TX_16X16][TX_8X8]; + count8x8_lp += counts->tx_size_implied[TX_32X32][TX_8X8]; + count16x16_16x16p += counts->tx_size_implied[TX_16X16][TX_16X16]; + count16x16_lp += counts->tx_size_implied[TX_32X32][TX_16X16]; + count32x32 += counts->tx_size_implied[TX_32X32][TX_32X32]; #endif // CONFIG_EXT_TX && CONFIG_RECT_TX if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && #if CONFIG_SUPERTX @@ -5709,6 +5220,9 @@ void av1_encode_frame(AV1_COMP *cpi) { } #endif } else { +#if CONFIG_EXT_INTER + make_consistent_compound_tools(cm); +#endif // CONFIG_EXT_INTER encode_frame_internal(cpi); } } @@ -5758,6 +5272,11 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd, ++counts->filter_intra[0][use_filter_intra_mode]; } if (mbmi->uv_mode == DC_PRED +#if CONFIG_CB4X4 + && + is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x, + xd->plane[1].subsampling_y) +#endif #if CONFIG_PALETTE && mbmi->palette_mode_info.palette_size[1] == 0 #endif // CONFIG_PALETTE @@ -5799,8 +5318,8 @@ static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd, const int tx_col = blk_col >> 1; const int max_blocks_high = max_block_high(xd, mbmi->sb_type, 0); const int max_blocks_wide = max_block_wide(xd, mbmi->sb_type, 0); - int ctx = txfm_partition_context(xd->above_txfm_context + tx_col, - xd->left_txfm_context + tx_row, + int ctx = txfm_partition_context(xd->above_txfm_context + blk_col, + xd->left_txfm_context + blk_row, mbmi->sb_type, tx_size); const TX_SIZE plane_tx_size = mbmi->inter_tx_size[tx_row][tx_col]; @@ -5809,8 +5328,8 @@ static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd, if (tx_size == plane_tx_size) { ++counts->txfm_partition[ctx][0]; mbmi->tx_size = tx_size; - txfm_partition_update(xd->above_txfm_context + tx_col, - xd->left_txfm_context + tx_row, tx_size, tx_size); + txfm_partition_update(xd->above_txfm_context + blk_col, + xd->left_txfm_context + blk_row, tx_size, tx_size); } else { const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; const int bs = tx_size_wide_unit[sub_txs]; @@ -5822,8 +5341,8 @@ static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd, if (tx_size == TX_8X8) { mbmi->inter_tx_size[tx_row][tx_col] = TX_4X4; mbmi->tx_size = TX_4X4; - txfm_partition_update(xd->above_txfm_context + tx_col, - xd->left_txfm_context + tx_row, TX_4X4, tx_size); + txfm_partition_update(xd->above_txfm_context + blk_col, + xd->left_txfm_context + blk_row, TX_4X4, tx_size); return; } @@ -5847,9 +5366,10 @@ static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x, const int bw = tx_size_wide_unit[max_tx_size]; int idx, idy; - xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = - xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); + xd->above_txfm_context = + cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2); + xd->left_txfm_context = xd->left_txfm_context_buffer + + ((mi_row & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2); for (idy = 0; idy < mi_height; idy += bh) for (idx = 0; idx < mi_width; idx += bw) @@ -5870,8 +5390,8 @@ static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row, if (tx_size == plane_tx_size) { mbmi->tx_size = tx_size; - txfm_partition_update(xd->above_txfm_context + tx_col, - xd->left_txfm_context + tx_row, tx_size, tx_size); + txfm_partition_update(xd->above_txfm_context + blk_col, + xd->left_txfm_context + blk_row, tx_size, tx_size); } else { const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; @@ -5881,8 +5401,8 @@ static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row, if (tx_size == TX_8X8) { mbmi->inter_tx_size[tx_row][tx_col] = TX_4X4; mbmi->tx_size = TX_4X4; - txfm_partition_update(xd->above_txfm_context + tx_col, - xd->left_txfm_context + tx_row, TX_4X4, tx_size); + txfm_partition_update(xd->above_txfm_context + blk_col, + xd->left_txfm_context + blk_row, TX_4X4, tx_size); return; } @@ -5905,9 +5425,10 @@ static void tx_partition_set_contexts(const AV1_COMMON *const cm, const int bw = tx_size_wide_unit[max_tx_size]; int idx, idy; - xd->above_txfm_context = cm->above_txfm_context + mi_col; - xd->left_txfm_context = - xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); + xd->above_txfm_context = + cm->above_txfm_context + (mi_col << TX_UNIT_WIDE_LOG2); + xd->left_txfm_context = xd->left_txfm_context_buffer + + ((mi_row & MAX_MIB_MASK) << TX_UNIT_HIGH_LOG2); for (idy = 0; idy < mi_height; idy += bh) for (idx = 0; idx < mi_width; idx += bw) @@ -5964,8 +5485,7 @@ void av1_update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd, static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row, - int mi_col, BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx, int *rate) { + int mi_col, BLOCK_SIZE bsize, int *rate) { const AV1_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; @@ -6039,10 +5559,9 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, &xd->block_refs[ref]->sf); } - if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip) - av1_build_inter_predictors_sby(xd, mi_row, mi_col, NULL, block_size); + av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, block_size); - av1_build_inter_predictors_sbuv(xd, mi_row, mi_col, NULL, block_size); + av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, NULL, block_size); #if CONFIG_MOTION_VAR if (mbmi->motion_mode == OBMC_CAUSAL) { #if CONFIG_NCOBMC @@ -6068,6 +5587,13 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, #endif } +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) { + daala_dist_set_sub8x8_dst(x, x->decoded_8x8, bsize, block_size_wide[bsize], + block_size_high[bsize], mi_row, mi_col); + } +#endif + if (!dry_run) { #if CONFIG_VAR_TX TX_SIZE tx_size = @@ -6092,7 +5618,7 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size]; const int depth = tx_size_to_depth(coded_tx_size); ++td->counts->tx_size[tx_size_cat][tx_size_ctx][depth]; - if (tx_size != max_txsize_lookup[bsize]) ++x->txb_split_count; + if (tx_size != max_txsize_rect_lookup[bsize]) ++x->txb_split_count; } #else const int tx_size_ctx = get_tx_size_context(xd); @@ -6103,6 +5629,13 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, ++td->counts->tx_size[tx_size_cat][tx_size_ctx][depth]; #endif +#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT + if (is_quarter_tx_allowed(xd, mbmi, is_inter) && + mbmi->tx_size != txsize_sqr_up_map[mbmi->tx_size]) { + ++td->counts->quarter_tx_size[mbmi->tx_size == + quarter_txsize_lookup[mbmi->sb_type]]; + } +#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT #if CONFIG_EXT_TX && CONFIG_RECT_TX assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi))); #endif // CONFIG_EXT_TX && CONFIG_RECT_TX @@ -6135,7 +5668,7 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_VAR_TX mbmi->min_tx_size = get_min_tx_size(intra_tx_size); - if (intra_tx_size != max_txsize_lookup[bsize]) ++x->txb_split_count; + if (intra_tx_size != max_txsize_rect_lookup[bsize]) ++x->txb_split_count; #endif } @@ -6327,13 +5860,13 @@ static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td, } if (!b_sub8x8) - av1_build_inter_predictors_sb_extend(xd, + av1_build_inter_predictors_sb_extend(cm, xd, #if CONFIG_EXT_INTER mi_row_ori, mi_col_ori, #endif // CONFIG_EXT_INTER mi_row_pred, mi_col_pred, bsize_pred); else - av1_build_inter_predictors_sb_sub8x8_extend(xd, + av1_build_inter_predictors_sb_sub8x8_extend(cm, xd, #if CONFIG_EXT_INTER mi_row_ori, mi_col_ori, #endif // CONFIG_EXT_INTER diff --git a/third_party/aom/av1/encoder/encodeframe.h b/third_party/aom/av1/encoder/encodeframe.h index 08d6d20de..46a99e1cf 100644 --- a/third_party/aom/av1/encoder/encodeframe.h +++ b/third_party/aom/av1/encoder/encodeframe.h @@ -25,13 +25,6 @@ struct yv12_buffer_config; struct AV1_COMP; struct ThreadData; -// Constants used in SOURCE_VAR_BASED_PARTITION -#define VAR_HIST_MAX_BG_VAR 1000 -#define VAR_HIST_FACTOR 10 -#define VAR_HIST_BINS (VAR_HIST_MAX_BG_VAR / VAR_HIST_FACTOR + 1) -#define VAR_HIST_LARGE_CUT_OFF 75 -#define VAR_HIST_SMALL_CUT_OFF 45 - void av1_setup_src_planes(struct macroblock *x, const struct yv12_buffer_config *src, int mi_row, int mi_col); @@ -42,8 +35,6 @@ void av1_init_tile_data(struct AV1_COMP *cpi); void av1_encode_tile(struct AV1_COMP *cpi, struct ThreadData *td, int tile_row, int tile_col); -void av1_set_variance_partition_thresholds(struct AV1_COMP *cpi, int q); - void av1_update_tx_type_count(const struct AV1Common *cm, MACROBLOCKD *xd, #if CONFIG_TXK_SEL int block, int plane, diff --git a/third_party/aom/av1/encoder/encodemb.c b/third_party/aom/av1/encoder/encodemb.c index c450244b1..7c9781533 100644 --- a/third_party/aom/av1/encoder/encodemb.c +++ b/third_party/aom/av1/encoder/encodemb.c @@ -115,7 +115,7 @@ static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = { #if CONFIG_EC_ADAPT { 10, 7 }, { 8, 5 }, #else - { 10, 6 }, { 8, 5 }, + { 10, 6 }, { 8, 6 }, #endif }; @@ -125,35 +125,31 @@ static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = { rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); \ } -static INLINE int64_t -get_token_bit_costs(unsigned int token_costs[2][COEFF_CONTEXTS][ENTROPY_TOKENS], - int skip_eob, int ctx, int token) { -#if CONFIG_NEW_TOKENSET +static INLINE unsigned int get_token_bit_costs( + unsigned int token_costs[2][COEFF_CONTEXTS][ENTROPY_TOKENS], int skip_eob, + int ctx, int token) { (void)skip_eob; return token_costs[token == ZERO_TOKEN || token == EOB_TOKEN][ctx][token]; -#else - return token_costs[skip_eob][ctx][token]; -#endif } +#if !CONFIG_LV_MAP #define USE_GREEDY_OPTIMIZE_B 0 #if USE_GREEDY_OPTIMIZE_B -typedef struct av1_token_state { +typedef struct av1_token_state_greedy { int16_t token; tran_low_t qc; tran_low_t dqc; -} av1_token_state; +} av1_token_state_greedy; -int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, - TX_SIZE tx_size, int ctx) { -#if !CONFIG_PVQ +static int optimize_b_greedy(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, + int block, TX_SIZE tx_size, int ctx) { MACROBLOCKD *const xd = &mb->e_mbd; struct macroblock_plane *const p = &mb->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; const int ref = is_inter_block(&xd->mi[0]->mbmi); - av1_token_state tokens[MAX_TX_SQUARE + 1][2]; + av1_token_state_greedy tokens[MAX_TX_SQUARE + 1][2]; uint8_t token_cache[MAX_TX_SQUARE]; const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); @@ -176,38 +172,23 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, #if CONFIG_NEW_QUANT int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type); const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq]; -#elif !CONFIG_AOM_QM - const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift }; #endif // CONFIG_NEW_QUANT int sz = 0; const int64_t rddiv = mb->rddiv; int64_t rd_cost0, rd_cost1; int16_t t0, t1; int i, final_eob; -#if CONFIG_HIGHBITDEPTH const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd); -#else - const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8); -#endif unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref]; const int default_eob = tx_size_2d[tx_size]; - assert((mb->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0)); + assert(mb->qindex > 0); assert((!plane_type && !plane) || (plane_type && plane)); assert(eob <= default_eob); int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1; -/* CpuSpeedTest uses "--min-q=0 --max-q=0" and expects 100dB psnr -* This creates conflict with search for a better EOB position -* The line below is to make sure EOB search is disabled at this corner case. -*/ -#if !CONFIG_NEW_QUANT && !CONFIG_AOM_QM - if (dq_step[1] <= 4) { - rdmult = 1; - } -#endif int64_t rate0, rate1; for (i = 0; i < eob; i++) { @@ -402,22 +383,10 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, dqc_a = shift ? ROUND_POWER_OF_TWO(dqc_a, shift) : dqc_a; if (sz) dqc_a = -dqc_a; #else -// The 32x32 transform coefficient uses half quantization step size. -// Account for the rounding difference in the dequantized coefficeint -// value when the quantization index is dropped from an even number -// to an odd number. - -#if CONFIG_AOM_QM - tran_low_t offset = dqv >> shift; -#else - tran_low_t offset = dq_step[rc != 0]; -#endif - if (shift & x_a) offset += (dqv & 0x01); - - if (sz == 0) - dqc_a = dqcoeff[rc] - offset; + if (x_a < 0) + dqc_a = -((-x_a * dqv) >> shift); else - dqc_a = dqcoeff[rc] + offset; + dqc_a = (x_a * dqv) >> shift; #endif // CONFIG_NEW_QUANT } else { dqc_a = 0; @@ -483,19 +452,11 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, mb->plane[plane].eobs[block] = final_eob; return final_eob; - -#else // !CONFIG_PVQ - (void)cm; - (void)tx_size; - (void)ctx; - struct macroblock_plane *const p = &mb->plane[plane]; - return p->eobs[block]; -#endif // !CONFIG_PVQ } #else // USE_GREEDY_OPTIMIZE_B -typedef struct av1_token_state { +typedef struct av1_token_state_org { int64_t error; int rate; int16_t next; @@ -503,16 +464,15 @@ typedef struct av1_token_state { tran_low_t qc; tran_low_t dqc; uint8_t best_index; -} av1_token_state; +} av1_token_state_org; -int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, - TX_SIZE tx_size, int ctx) { -#if !CONFIG_PVQ +static int optimize_b_org(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, + int block, TX_SIZE tx_size, int ctx) { MACROBLOCKD *const xd = &mb->e_mbd; struct macroblock_plane *const p = &mb->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; const int ref = is_inter_block(&xd->mi[0]->mbmi); - av1_token_state tokens[MAX_TX_SQUARE + 1][2]; + av1_token_state_org tokens[MAX_TX_SQUARE + 1][2]; uint8_t token_cache[MAX_TX_SQUARE]; const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); @@ -536,8 +496,6 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, #if CONFIG_NEW_QUANT int dq = get_dq_profile_from_ctx(mb->qindex, ctx, ref, plane_type); const dequant_val_type_nuq *dequant_val = pd->dequant_val_nuq[dq]; -#elif !CONFIG_AOM_QM - const int dq_step[2] = { dequant_ptr[0] >> shift, dequant_ptr[1] >> shift }; #endif // CONFIG_NEW_QUANT int next = eob, sz = 0; const int64_t rdmult = (mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1; @@ -549,11 +507,7 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, int best, band = (eob < default_eob) ? band_translate[eob] : band_translate[eob - 1]; int pt, i, final_eob; -#if CONFIG_HIGHBITDEPTH const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd); -#else - const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, 8); -#endif unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = mb->token_costs[txsize_sqr_map[tx_size]][plane_type][ref]; const uint16_t *band_counts = &band_count_table[tx_size][band]; @@ -566,11 +520,10 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, ? av1_get_qindex(&cm->seg, xd->mi[0]->mbmi.segment_id, cm->base_qindex) : cm->base_qindex; - if (qindex == 0) { - assert((qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0)); - } + assert(qindex > 0); + (void)qindex; #else - assert((mb->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0)); + assert(mb->qindex > 0); #endif token_costs += band; @@ -777,22 +730,10 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, : tokens[i][1].dqc; if (sz) tokens[i][1].dqc = -tokens[i][1].dqc; #else -// The 32x32 transform coefficient uses half quantization step size. -// Account for the rounding difference in the dequantized coefficeint -// value when the quantization index is dropped from an even number -// to an odd number. - -#if CONFIG_AOM_QM - tran_low_t offset = dqv >> shift; -#else - tran_low_t offset = dq_step[rc != 0]; -#endif - if (shift & x) offset += (dqv & 0x01); - - if (sz == 0) - tokens[i][1].dqc = dqcoeff[rc] - offset; + if (x < 0) + tokens[i][1].dqc = -((-x * dqv) >> shift); else - tokens[i][1].dqc = dqcoeff[rc] + offset; + tokens[i][1].dqc = (x * dqv) >> shift; #endif // CONFIG_NEW_QUANT } else { tokens[i][1].dqc = 0; @@ -858,16 +799,47 @@ int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, mb->plane[plane].eobs[block] = final_eob; assert(final_eob <= default_eob); return final_eob; -#else // !CONFIG_PVQ +} + +#endif // USE_GREEDY_OPTIMIZE_B +#endif // !CONFIG_LV_MAP + +int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, + const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l) { + MACROBLOCKD *const xd = &mb->e_mbd; + struct macroblock_plane *const p = &mb->plane[plane]; + const int eob = p->eobs[block]; + assert((mb->qindex == 0) ^ (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0)); + if (eob == 0) return eob; + if (xd->lossless[xd->mi[0]->mbmi.segment_id]) return eob; +#if CONFIG_PVQ (void)cm; (void)tx_size; - (void)ctx; - struct macroblock_plane *const p = &mb->plane[plane]; - return p->eobs[block]; -#endif // !CONFIG_PVQ -} + (void)a; + (void)l; + return eob; +#endif + +#if !CONFIG_LV_MAP + (void)plane_bsize; +#if CONFIG_VAR_TX + int ctx = get_entropy_context(tx_size, a, l); +#else + int ctx = combine_entropy_contexts(*a, *l); +#endif +#if USE_GREEDY_OPTIMIZE_B + return optimize_b_greedy(cm, mb, plane, block, tx_size, ctx); +#else // USE_GREEDY_OPTIMIZE_B + return optimize_b_org(cm, mb, plane, block, tx_size, ctx); #endif // USE_GREEDY_OPTIMIZE_B +#else // !CONFIG_LV_MAP + TXB_CTX txb_ctx; + get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx); + return av1_optimize_txb(cm, mb, plane, block, tx_size, &txb_ctx); +#endif // !CONFIG_LV_MAP +} #if !CONFIG_PVQ #if CONFIG_HIGHBITDEPTH @@ -1158,8 +1130,7 @@ static void encode_block(int plane, int block, int blk_row, int blk_col, #endif #if !CONFIG_PVQ - if (p->eobs[block] && !xd->lossless[xd->mi[0]->mbmi.segment_id]) - av1_optimize_b(cm, x, plane, block, tx_size, ctx); + av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l); av1_set_txb_context(x, plane, block, tx_size, a, l); @@ -1202,12 +1173,13 @@ static void encode_block_inter(int plane, int block, int blk_row, int blk_col, if (tx_size == plane_tx_size) { encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg); } else { + assert(tx_size < TX_SIZES_ALL); const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; + assert(sub_txs < tx_size); // This is the square transform block partition entry point. int bsl = tx_size_wide_unit[sub_txs]; int i; assert(bsl > 0); - assert(tx_size < TX_SIZES_ALL); for (i = 0; i < 4; ++i) { const int offsetr = blk_row + ((i >> 1) * bsl); @@ -1301,8 +1273,8 @@ void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize) { encode_block_pass1, &args); } -void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, - const int mi_row, const int mi_col) { +void av1_encode_sb(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, + int mi_col) { MACROBLOCKD *const xd = &x->e_mbd; struct optimize_ctx ctx; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; @@ -1433,6 +1405,301 @@ static void encode_block_intra_and_set_context(int plane, int block, #endif } +#if CONFIG_DPCM_INTRA +static int get_eob(const tran_low_t *qcoeff, intptr_t n_coeffs, + const int16_t *scan) { + int eob = -1; + for (int i = (int)n_coeffs - 1; i >= 0; i--) { + const int rc = scan[i]; + if (qcoeff[rc]) { + eob = i; + break; + } + } + return eob + 1; +} + +static void quantize_scaler(int coeff, int16_t zbin, int16_t round_value, + int16_t quant, int16_t quant_shift, int16_t dequant, + int log_scale, tran_low_t *const qcoeff, + tran_low_t *const dqcoeff) { + zbin = ROUND_POWER_OF_TWO(zbin, log_scale); + round_value = ROUND_POWER_OF_TWO(round_value, log_scale); + const int coeff_sign = (coeff >> 31); + const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; + if (abs_coeff >= zbin) { + int tmp = clamp(abs_coeff + round_value, INT16_MIN, INT16_MAX); + tmp = ((((tmp * quant) >> 16) + tmp) * quant_shift) >> (16 - log_scale); + *qcoeff = (tmp ^ coeff_sign) - coeff_sign; + *dqcoeff = (*qcoeff * dequant) / (1 << log_scale); + } +} + +typedef void (*dpcm_fwd_tx_func)(const int16_t *input, int stride, + TX_TYPE_1D tx_type, tran_low_t *output); + +static dpcm_fwd_tx_func get_dpcm_fwd_tx_func(int tx_length) { + switch (tx_length) { + case 4: return av1_dpcm_ft4_c; + case 8: return av1_dpcm_ft8_c; + case 16: return av1_dpcm_ft16_c; + case 32: + return av1_dpcm_ft32_c; + // TODO(huisu): add support for TX_64X64. + default: assert(0); return NULL; + } +} + +static void process_block_dpcm_vert(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d, + struct macroblockd_plane *const pd, + struct macroblock_plane *const p, + uint8_t *src, int src_stride, uint8_t *dst, + int dst_stride, int16_t *src_diff, + int diff_stride, tran_low_t *coeff, + tran_low_t *qcoeff, tran_low_t *dqcoeff) { + const int tx1d_width = tx_size_wide[tx_size]; + dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_width); + dpcm_inv_txfm_add_func inverse_tx = + av1_get_dpcm_inv_txfm_add_func(tx1d_width); + const int tx1d_height = tx_size_high[tx_size]; + const int log_scale = av1_get_tx_scale(tx_size); + int q_idx = 0; + for (int r = 0; r < tx1d_height; ++r) { + // Update prediction. + if (r > 0) memcpy(dst, dst - dst_stride, tx1d_width * sizeof(dst[0])); + // Subtraction. + for (int c = 0; c < tx1d_width; ++c) src_diff[c] = src[c] - dst[c]; + // Forward transform. + forward_tx(src_diff, 1, tx_type_1d, coeff); + // Quantization. + for (int c = 0; c < tx1d_width; ++c) { + quantize_scaler(coeff[c], p->zbin[q_idx], p->round[q_idx], + p->quant[q_idx], p->quant_shift[q_idx], + pd->dequant[q_idx], log_scale, &qcoeff[c], &dqcoeff[c]); + q_idx = 1; + } + // Inverse transform. + inverse_tx(dqcoeff, 1, tx_type_1d, dst); + // Move to the next row. + coeff += tx1d_width; + qcoeff += tx1d_width; + dqcoeff += tx1d_width; + src_diff += diff_stride; + dst += dst_stride; + src += src_stride; + } +} + +static void process_block_dpcm_horz(TX_SIZE tx_size, TX_TYPE_1D tx_type_1d, + struct macroblockd_plane *const pd, + struct macroblock_plane *const p, + uint8_t *src, int src_stride, uint8_t *dst, + int dst_stride, int16_t *src_diff, + int diff_stride, tran_low_t *coeff, + tran_low_t *qcoeff, tran_low_t *dqcoeff) { + const int tx1d_height = tx_size_high[tx_size]; + dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_height); + dpcm_inv_txfm_add_func inverse_tx = + av1_get_dpcm_inv_txfm_add_func(tx1d_height); + const int tx1d_width = tx_size_wide[tx_size]; + const int log_scale = av1_get_tx_scale(tx_size); + int q_idx = 0; + for (int c = 0; c < tx1d_width; ++c) { + for (int r = 0; r < tx1d_height; ++r) { + // Update prediction. + if (c > 0) dst[r * dst_stride] = dst[r * dst_stride - 1]; + // Subtraction. + src_diff[r * diff_stride] = src[r * src_stride] - dst[r * dst_stride]; + } + // Forward transform. + tran_low_t tx_buff[64]; + forward_tx(src_diff, diff_stride, tx_type_1d, tx_buff); + for (int r = 0; r < tx1d_height; ++r) coeff[r * tx1d_width] = tx_buff[r]; + // Quantization. + for (int r = 0; r < tx1d_height; ++r) { + quantize_scaler(coeff[r * tx1d_width], p->zbin[q_idx], p->round[q_idx], + p->quant[q_idx], p->quant_shift[q_idx], + pd->dequant[q_idx], log_scale, &qcoeff[r * tx1d_width], + &dqcoeff[r * tx1d_width]); + q_idx = 1; + } + // Inverse transform. + for (int r = 0; r < tx1d_height; ++r) tx_buff[r] = dqcoeff[r * tx1d_width]; + inverse_tx(tx_buff, dst_stride, tx_type_1d, dst); + // Move to the next column. + ++coeff, ++qcoeff, ++dqcoeff, ++src_diff, ++dst, ++src; + } +} + +#if CONFIG_HIGHBITDEPTH +static void hbd_process_block_dpcm_vert( + TX_SIZE tx_size, TX_TYPE_1D tx_type_1d, int bd, + struct macroblockd_plane *const pd, struct macroblock_plane *const p, + uint8_t *src8, int src_stride, uint8_t *dst8, int dst_stride, + int16_t *src_diff, int diff_stride, tran_low_t *coeff, tran_low_t *qcoeff, + tran_low_t *dqcoeff) { + const int tx1d_width = tx_size_wide[tx_size]; + dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_width); + hbd_dpcm_inv_txfm_add_func inverse_tx = + av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_width); + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); + const int tx1d_height = tx_size_high[tx_size]; + const int log_scale = av1_get_tx_scale(tx_size); + int q_idx = 0; + for (int r = 0; r < tx1d_height; ++r) { + // Update prediction. + if (r > 0) memcpy(dst, dst - dst_stride, tx1d_width * sizeof(dst[0])); + // Subtraction. + for (int c = 0; c < tx1d_width; ++c) src_diff[c] = src[c] - dst[c]; + // Forward transform. + forward_tx(src_diff, 1, tx_type_1d, coeff); + // Quantization. + for (int c = 0; c < tx1d_width; ++c) { + quantize_scaler(coeff[c], p->zbin[q_idx], p->round[q_idx], + p->quant[q_idx], p->quant_shift[q_idx], + pd->dequant[q_idx], log_scale, &qcoeff[c], &dqcoeff[c]); + q_idx = 1; + } + // Inverse transform. + inverse_tx(dqcoeff, 1, tx_type_1d, bd, dst); + // Move to the next row. + coeff += tx1d_width; + qcoeff += tx1d_width; + dqcoeff += tx1d_width; + src_diff += diff_stride; + dst += dst_stride; + src += src_stride; + } +} + +static void hbd_process_block_dpcm_horz( + TX_SIZE tx_size, TX_TYPE_1D tx_type_1d, int bd, + struct macroblockd_plane *const pd, struct macroblock_plane *const p, + uint8_t *src8, int src_stride, uint8_t *dst8, int dst_stride, + int16_t *src_diff, int diff_stride, tran_low_t *coeff, tran_low_t *qcoeff, + tran_low_t *dqcoeff) { + const int tx1d_height = tx_size_high[tx_size]; + dpcm_fwd_tx_func forward_tx = get_dpcm_fwd_tx_func(tx1d_height); + hbd_dpcm_inv_txfm_add_func inverse_tx = + av1_get_hbd_dpcm_inv_txfm_add_func(tx1d_height); + uint16_t *src = CONVERT_TO_SHORTPTR(src8); + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); + const int tx1d_width = tx_size_wide[tx_size]; + const int log_scale = av1_get_tx_scale(tx_size); + int q_idx = 0; + for (int c = 0; c < tx1d_width; ++c) { + for (int r = 0; r < tx1d_height; ++r) { + // Update prediction. + if (c > 0) dst[r * dst_stride] = dst[r * dst_stride - 1]; + // Subtraction. + src_diff[r * diff_stride] = src[r * src_stride] - dst[r * dst_stride]; + } + // Forward transform. + tran_low_t tx_buff[64]; + forward_tx(src_diff, diff_stride, tx_type_1d, tx_buff); + for (int r = 0; r < tx1d_height; ++r) coeff[r * tx1d_width] = tx_buff[r]; + // Quantization. + for (int r = 0; r < tx1d_height; ++r) { + quantize_scaler(coeff[r * tx1d_width], p->zbin[q_idx], p->round[q_idx], + p->quant[q_idx], p->quant_shift[q_idx], + pd->dequant[q_idx], log_scale, &qcoeff[r * tx1d_width], + &dqcoeff[r * tx1d_width]); + q_idx = 1; + } + // Inverse transform. + for (int r = 0; r < tx1d_height; ++r) tx_buff[r] = dqcoeff[r * tx1d_width]; + inverse_tx(tx_buff, dst_stride, tx_type_1d, bd, dst); + // Move to the next column. + ++coeff, ++qcoeff, ++dqcoeff, ++src_diff, ++dst, ++src; + } +} +#endif // CONFIG_HIGHBITDEPTH + +void av1_encode_block_intra_dpcm(const AV1_COMMON *cm, MACROBLOCK *x, + PREDICTION_MODE mode, int plane, int block, + int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, + TX_TYPE tx_type, ENTROPY_CONTEXT *ta, + ENTROPY_CONTEXT *tl, int8_t *skip) { + MACROBLOCKD *const xd = &x->e_mbd; + struct macroblock_plane *const p = &x->plane[plane]; + struct macroblockd_plane *const pd = &xd->plane[plane]; + tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + const int diff_stride = block_size_wide[plane_bsize]; + const int src_stride = p->src.stride; + const int dst_stride = pd->dst.stride; + const int tx1d_width = tx_size_wide[tx_size]; + const int tx1d_height = tx_size_high[tx_size]; + const SCAN_ORDER *const scan_order = get_scan(cm, tx_size, tx_type, 0); + tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block); + tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); + uint8_t *dst = + &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; + uint8_t *src = + &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]]; + int16_t *src_diff = + &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]]; + uint16_t *eob = &p->eobs[block]; + *eob = 0; + memset(qcoeff, 0, tx1d_height * tx1d_width * sizeof(*qcoeff)); + memset(dqcoeff, 0, tx1d_height * tx1d_width * sizeof(*dqcoeff)); + + if (LIKELY(!x->skip_block)) { + TX_TYPE_1D tx_type_1d = DCT_1D; + switch (tx_type) { + case IDTX: tx_type_1d = IDTX_1D; break; + case V_DCT: + assert(mode == H_PRED); + tx_type_1d = DCT_1D; + break; + case H_DCT: + assert(mode == V_PRED); + tx_type_1d = DCT_1D; + break; + default: assert(0); + } + switch (mode) { + case V_PRED: +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + hbd_process_block_dpcm_vert(tx_size, tx_type_1d, xd->bd, pd, p, src, + src_stride, dst, dst_stride, src_diff, + diff_stride, coeff, qcoeff, dqcoeff); + } else { +#endif // CONFIG_HIGHBITDEPTH + process_block_dpcm_vert(tx_size, tx_type_1d, pd, p, src, src_stride, + dst, dst_stride, src_diff, diff_stride, coeff, + qcoeff, dqcoeff); +#if CONFIG_HIGHBITDEPTH + } +#endif // CONFIG_HIGHBITDEPTH + break; + case H_PRED: +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + hbd_process_block_dpcm_horz(tx_size, tx_type_1d, xd->bd, pd, p, src, + src_stride, dst, dst_stride, src_diff, + diff_stride, coeff, qcoeff, dqcoeff); + } else { +#endif // CONFIG_HIGHBITDEPTH + process_block_dpcm_horz(tx_size, tx_type_1d, pd, p, src, src_stride, + dst, dst_stride, src_diff, diff_stride, coeff, + qcoeff, dqcoeff); +#if CONFIG_HIGHBITDEPTH + } +#endif // CONFIG_HIGHBITDEPTH + break; + default: assert(0); + } + *eob = get_eob(qcoeff, tx1d_height * tx1d_width, scan_order->scan); + } + + ta[blk_col] = tl[blk_row] = *eob > 0; + if (*eob) *skip = 0; +} +#endif // CONFIG_DPCM_INTRA + void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { @@ -1449,7 +1716,33 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, const int dst_stride = pd->dst.stride; uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; +#if CONFIG_CFL + +#if CONFIG_EC_ADAPT + FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; +#else + FRAME_CONTEXT *const ec_ctx = cm->fc; +#endif // CONFIG_EC_ADAPT + + av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col, + blk_row, tx_size, plane_bsize); +#else av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size); +#endif + +#if CONFIG_DPCM_INTRA + const int block_raster_idx = av1_block_index_to_raster_order(tx_size, block); + const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + const PREDICTION_MODE mode = + (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode; + if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) { + av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col, + plane_bsize, tx_size, tx_type, args->ta, + args->tl, args->skip); + return; + } +#endif // CONFIG_DPCM_INTRA + av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size); const ENTROPY_CONTEXT *a = &args->ta[blk_col]; @@ -1458,9 +1751,7 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, if (args->enable_optimize_b) { av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, ctx, AV1_XFORM_QUANT_FP); - if (p->eobs[block]) { - av1_optimize_b(cm, x, plane, block, tx_size, ctx); - } + av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l); } else { av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, ctx, AV1_XFORM_QUANT_B); @@ -1480,16 +1771,216 @@ void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, // Note : *(args->skip) == mbmi->skip #endif #if CONFIG_CFL + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; if (plane == AOM_PLANE_Y && x->cfl_store_y) { cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size); } + + if (mbmi->uv_mode == DC_PRED) { + // TODO(ltrudeau) find a cleaner way to detect last transform block + if (plane == AOM_PLANE_U) { + xd->cfl->num_tx_blk[CFL_PRED_U] = + (blk_row == 0 && blk_col == 0) ? 1 + : xd->cfl->num_tx_blk[CFL_PRED_U] + 1; + } + + if (plane == AOM_PLANE_V) { + xd->cfl->num_tx_blk[CFL_PRED_V] = + (blk_row == 0 && blk_col == 0) ? 1 + : xd->cfl->num_tx_blk[CFL_PRED_V] + 1; + + if (mbmi->skip && + xd->cfl->num_tx_blk[CFL_PRED_U] == xd->cfl->num_tx_blk[CFL_PRED_V]) { + assert(plane_bsize != BLOCK_INVALID); + const int block_width = block_size_wide[plane_bsize]; + const int block_height = block_size_high[plane_bsize]; + + // if SKIP is chosen at the block level, and ind != 0, we must change + // the prediction + if (mbmi->cfl_alpha_idx != 0) { + const struct macroblockd_plane *const pd_cb = &xd->plane[AOM_PLANE_U]; + uint8_t *const dst_cb = pd_cb->dst.buf; + const int dst_stride_cb = pd_cb->dst.stride; + uint8_t *const dst_cr = pd->dst.buf; + const int dst_stride_cr = pd->dst.stride; + for (int j = 0; j < block_height; j++) { + for (int i = 0; i < block_width; i++) { + dst_cb[dst_stride_cb * j + i] = + (uint8_t)(xd->cfl->dc_pred[CFL_PRED_U] + 0.5); + dst_cr[dst_stride_cr * j + i] = + (uint8_t)(xd->cfl->dc_pred[CFL_PRED_V] + 0.5); + } + } + mbmi->cfl_alpha_idx = 0; + mbmi->cfl_alpha_signs[CFL_PRED_U] = CFL_SIGN_POS; + mbmi->cfl_alpha_signs[CFL_PRED_V] = CFL_SIGN_POS; + } + } + } + } #endif } +#if CONFIG_CFL +static int cfl_alpha_dist(const uint8_t *y_pix, int y_stride, double y_avg, + const uint8_t *src, int src_stride, int blk_width, + int blk_height, double dc_pred, double alpha, + int *dist_neg_out) { + const double dc_pred_bias = dc_pred + 0.5; + int dist = 0; + int diff; + + if (alpha == 0.0) { + const int dc_pred_i = (int)dc_pred_bias; + for (int j = 0; j < blk_height; j++) { + for (int i = 0; i < blk_width; i++) { + diff = src[i] - dc_pred_i; + dist += diff * diff; + } + src += src_stride; + } + + if (dist_neg_out) *dist_neg_out = dist; + + return dist; + } + + int dist_neg = 0; + for (int j = 0; j < blk_height; j++) { + for (int i = 0; i < blk_width; i++) { + const double scaled_luma = alpha * (y_pix[i] - y_avg); + const int uv = src[i]; + diff = uv - (int)(scaled_luma + dc_pred_bias); + dist += diff * diff; + diff = uv + (int)(scaled_luma - dc_pred_bias); + dist_neg += diff * diff; + } + y_pix += y_stride; + src += src_stride; + } + + if (dist_neg_out) *dist_neg_out = dist_neg; + + return dist; +} + +static int cfl_compute_alpha_ind(MACROBLOCK *const x, const CFL_CTX *const cfl, + BLOCK_SIZE bsize, + CFL_SIGN_TYPE signs_out[CFL_SIGNS]) { + const struct macroblock_plane *const p_u = &x->plane[AOM_PLANE_U]; + const struct macroblock_plane *const p_v = &x->plane[AOM_PLANE_V]; + const uint8_t *const src_u = p_u->src.buf; + const uint8_t *const src_v = p_v->src.buf; + const int src_stride_u = p_u->src.stride; + const int src_stride_v = p_v->src.stride; + const int block_width = block_size_wide[bsize]; + const int block_height = block_size_high[bsize]; + const double dc_pred_u = cfl->dc_pred[CFL_PRED_U]; + const double dc_pred_v = cfl->dc_pred[CFL_PRED_V]; + + // Temporary pixel buffer used to store the CfL prediction when we compute the + // alpha index. + uint8_t tmp_pix[MAX_SB_SQUARE]; + // Load CfL Prediction over the entire block + const double y_avg = + cfl_load(cfl, tmp_pix, MAX_SB_SIZE, 0, 0, block_width, block_height); + + int sse[CFL_PRED_PLANES][CFL_MAGS_SIZE]; + sse[CFL_PRED_U][0] = + cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, + block_width, block_height, dc_pred_u, 0, NULL); + sse[CFL_PRED_V][0] = + cfl_alpha_dist(tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, + block_width, block_height, dc_pred_v, 0, NULL); + for (int m = 1; m < CFL_MAGS_SIZE; m += 2) { + assert(cfl_alpha_mags[m + 1] == -cfl_alpha_mags[m]); + sse[CFL_PRED_U][m] = cfl_alpha_dist( + tmp_pix, MAX_SB_SIZE, y_avg, src_u, src_stride_u, block_width, + block_height, dc_pred_u, cfl_alpha_mags[m], &sse[CFL_PRED_U][m + 1]); + sse[CFL_PRED_V][m] = cfl_alpha_dist( + tmp_pix, MAX_SB_SIZE, y_avg, src_v, src_stride_v, block_width, + block_height, dc_pred_v, cfl_alpha_mags[m], &sse[CFL_PRED_V][m + 1]); + } + + int dist; + int64_t cost; + int64_t best_cost; + + // Compute least squares parameter of the entire block + // IMPORTANT: We assume that the first code is 0,0 + int ind = 0; + signs_out[CFL_PRED_U] = CFL_SIGN_POS; + signs_out[CFL_PRED_V] = CFL_SIGN_POS; + + dist = sse[CFL_PRED_U][0] + sse[CFL_PRED_V][0]; + dist *= 16; + best_cost = RDCOST(x->rdmult, x->rddiv, cfl->costs[0], dist); + + for (int c = 1; c < CFL_ALPHABET_SIZE; c++) { + const int idx_u = cfl_alpha_codes[c][CFL_PRED_U]; + const int idx_v = cfl_alpha_codes[c][CFL_PRED_V]; + for (CFL_SIGN_TYPE sign_u = idx_u == 0; sign_u < CFL_SIGNS; sign_u++) { + for (CFL_SIGN_TYPE sign_v = idx_v == 0; sign_v < CFL_SIGNS; sign_v++) { + dist = sse[CFL_PRED_U][idx_u + (sign_u == CFL_SIGN_NEG)] + + sse[CFL_PRED_V][idx_v + (sign_v == CFL_SIGN_NEG)]; + dist *= 16; + cost = RDCOST(x->rdmult, x->rddiv, cfl->costs[c], dist); + if (cost < best_cost) { + best_cost = cost; + ind = c; + signs_out[CFL_PRED_U] = sign_u; + signs_out[CFL_PRED_V] = sign_v; + } + } + } + } + + return ind; +} + +static inline void cfl_update_costs(CFL_CTX *cfl, FRAME_CONTEXT *ec_ctx) { + assert(ec_ctx->cfl_alpha_cdf[CFL_ALPHABET_SIZE - 1] == + AOM_ICDF(CDF_PROB_TOP)); + const int prob_den = CDF_PROB_TOP; + + int prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[0]); + cfl->costs[0] = av1_cost_zero(get_prob(prob_num, prob_den)); + + for (int c = 1; c < CFL_ALPHABET_SIZE; c++) { + int sign_bit_cost = (cfl_alpha_codes[c][CFL_PRED_U] != 0) + + (cfl_alpha_codes[c][CFL_PRED_V] != 0); + prob_num = AOM_ICDF(ec_ctx->cfl_alpha_cdf[c]) - + AOM_ICDF(ec_ctx->cfl_alpha_cdf[c - 1]); + cfl->costs[c] = av1_cost_zero(get_prob(prob_num, prob_den)) + + av1_cost_literal(sign_bit_cost); + } +} + +void av1_predict_intra_block_encoder_facade(MACROBLOCK *x, + FRAME_CONTEXT *ec_ctx, int plane, + int block_idx, int blk_col, + int blk_row, TX_SIZE tx_size, + BLOCK_SIZE plane_bsize) { + MACROBLOCKD *const xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + if (plane != AOM_PLANE_Y && mbmi->uv_mode == DC_PRED) { + if (blk_col == 0 && blk_row == 0 && plane == AOM_PLANE_U) { + CFL_CTX *const cfl = xd->cfl; + cfl_update_costs(cfl, ec_ctx); + cfl_dc_pred(xd, plane_bsize, tx_size); + mbmi->cfl_alpha_idx = + cfl_compute_alpha_ind(x, cfl, plane_bsize, mbmi->cfl_alpha_signs); + } + } + av1_predict_intra_block_facade(xd, plane, block_idx, blk_col, blk_row, + tx_size); +} +#endif + void av1_encode_intra_block_plane(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize, int plane, - int enable_optimize_b, const int mi_row, - const int mi_col) { + int enable_optimize_b, int mi_row, + int mi_col) { const MACROBLOCKD *const xd = &x->e_mbd; ENTROPY_CONTEXT ta[2 * MAX_MIB_SIZE] = { 0 }; ENTROPY_CONTEXT tl[2 * MAX_MIB_SIZE] = { 0 }; @@ -1545,9 +2036,7 @@ PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff, DECLARE_ALIGNED(16, int32_t, ref_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]); DECLARE_ALIGNED(16, int32_t, out_int32[OD_TXSIZE_MAX * OD_TXSIZE_MAX]); -#if CONFIG_HIGHBITDEPTH hbd_downshift = x->e_mbd.bd - 8; -#endif assert(OD_COEFF_SHIFT >= 4); // DC quantizer for PVQ @@ -1563,10 +2052,10 @@ PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff, *eob = 0; -#if CONFIG_DAALA_EC +#if !CONFIG_ANS tell = od_ec_enc_tell_frac(&daala_enc->w.ec); #else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +#error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif // Change coefficient ordering for pvq encoding. @@ -1635,11 +2124,11 @@ PVQ_SKIP_TYPE av1_pvq_encode_helper(MACROBLOCK *x, tran_low_t *const coeff, *eob = tx_blk_size * tx_blk_size; -#if CONFIG_DAALA_EC +#if !CONFIG_ANS *rate = (od_ec_enc_tell_frac(&daala_enc->w.ec) - tell) << (AV1_PROB_COST_SHIFT - OD_BITRES); #else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +#error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif assert(*rate >= 0); diff --git a/third_party/aom/av1/encoder/encodemb.h b/third_party/aom/av1/encoder/encodemb.h index 73fde1d88..35a2c1570 100644 --- a/third_party/aom/av1/encoder/encodemb.h +++ b/third_party/aom/av1/encoder/encodemb.h @@ -54,7 +54,8 @@ void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, int ctx, AV1_XFORM_QUANT xform_quant_idx); int av1_optimize_b(const AV1_COMMON *cm, MACROBLOCK *mb, int plane, int block, - TX_SIZE tx_size, int ctx); + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, + const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l); void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize, int blk_col, int blk_row, TX_SIZE tx_size); @@ -85,6 +86,23 @@ void av1_store_pvq_enc_info(PVQ_INFO *pvq_info, int *qg, int *theta, int *k, int *size, int skip_rest, int skip_dir, int bs); #endif +#if CONFIG_CFL +void av1_predict_intra_block_encoder_facade(MACROBLOCK *x, + FRAME_CONTEXT *ec_ctx, int plane, + int block_idx, int blk_col, + int blk_row, TX_SIZE tx_size, + BLOCK_SIZE plane_bsize); +#endif + +#if CONFIG_DPCM_INTRA +void av1_encode_block_intra_dpcm(const AV1_COMMON *cm, MACROBLOCK *x, + PREDICTION_MODE mode, int plane, int block, + int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, + TX_TYPE tx_type, ENTROPY_CONTEXT *ta, + ENTROPY_CONTEXT *tl, int8_t *skip); +#endif // CONFIG_DPCM_INTRA + #ifdef __cplusplus } // extern "C" #endif diff --git a/third_party/aom/av1/encoder/encodemv.c b/third_party/aom/av1/encoder/encodemv.c index a2a53f840..eb0ff88c4 100644 --- a/third_party/aom/av1/encoder/encodemv.c +++ b/third_party/aom/av1/encoder/encodemv.c @@ -45,13 +45,8 @@ static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp, // Sign aom_write(w, sign, mvcomp->sign); -// Class -#if CONFIG_EC_MULTISYMBOL + // Class aom_write_symbol(w, mv_class, mvcomp->class_cdf, MV_CLASSES); -#else - av1_write_token(w, av1_mv_class_tree, mvcomp->classes, - &mv_class_encodings[mv_class]); -#endif // Integer bits if (mv_class == MV_CLASS_0) { @@ -62,16 +57,10 @@ static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp, for (i = 0; i < n; ++i) aom_write(w, (d >> i) & 1, mvcomp->bits[i]); } -// Fractional bits -#if CONFIG_EC_MULTISYMBOL + // Fractional bits aom_write_symbol( w, fr, mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf, MV_FP_SIZE); -#else - av1_write_token(w, av1_mv_fp_tree, - mv_class == MV_CLASS_0 ? mvcomp->class0_fp[d] : mvcomp->fp, - &mv_fp_encodings[fr]); -#endif // High precision bit if (usehp) @@ -171,7 +160,6 @@ static void write_mv_update(const aom_tree_index *tree, void av1_write_nmv_probs(AV1_COMMON *cm, int usehp, aom_writer *w, nmv_context_counts *const nmv_counts) { int i; -#if CONFIG_REF_MV int nmv_ctx = 0; for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) { nmv_context *const mvc = &cm->fc->nmvc[nmv_ctx]; @@ -213,57 +201,13 @@ void av1_write_nmv_probs(AV1_COMMON *cm, int usehp, aom_writer *w, } } } -#else - nmv_context *const mvc = &cm->fc->nmvc; - nmv_context_counts *const counts = nmv_counts; - -#if !CONFIG_EC_ADAPT - write_mv_update(av1_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS, w); - - for (i = 0; i < 2; ++i) { - int j; - nmv_component *comp = &mvc->comps[i]; - nmv_component_counts *comp_counts = &counts->comps[i]; - - update_mv(w, comp_counts->sign, &comp->sign, MV_UPDATE_PROB); - write_mv_update(av1_mv_class_tree, comp->classes, comp_counts->classes, - MV_CLASSES, w); - write_mv_update(av1_mv_class0_tree, comp->class0, comp_counts->class0, - CLASS0_SIZE, w); - for (j = 0; j < MV_OFFSET_BITS; ++j) - update_mv(w, comp_counts->bits[j], &comp->bits[j], MV_UPDATE_PROB); - } - - for (i = 0; i < 2; ++i) { - int j; - for (j = 0; j < CLASS0_SIZE; ++j) { - write_mv_update(av1_mv_fp_tree, mvc->comps[i].class0_fp[j], - counts->comps[i].class0_fp[j], MV_FP_SIZE, w); - } - write_mv_update(av1_mv_fp_tree, mvc->comps[i].fp, counts->comps[i].fp, - MV_FP_SIZE, w); - } -#endif // !CONFIG_EC_ADAPT - - if (usehp) { - for (i = 0; i < 2; ++i) { - update_mv(w, counts->comps[i].class0_hp, &mvc->comps[i].class0_hp, - MV_UPDATE_PROB); - update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, MV_UPDATE_PROB); - } - } -#endif } void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref, nmv_context *mvctx, int usehp) { const MV diff = { mv->row - ref->row, mv->col - ref->col }; const MV_JOINT_TYPE j = av1_get_mv_joint(&diff); -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, j, mvctx->joint_cdf, MV_JOINTS); -#else - av1_write_token(w, av1_mv_joint_tree, mvctx->joints, &mv_joint_encodings[j]); -#endif if (mv_joint_vertical(j)) encode_mv_component(w, diff.row, &mvctx->comps[0], usehp); @@ -284,11 +228,7 @@ void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref, const MV diff = { mv->row - ref->row, mv->col - ref->col }; const MV_JOINT_TYPE j = av1_get_mv_joint(&diff); -#if CONFIG_EC_MULTISYMBOL aom_write_symbol(w, j, mvctx->joint_cdf, MV_JOINTS); -#else - av1_write_token(w, av1_mv_joint_tree, mvctx->joints, &mv_joint_encodings[j]); -#endif if (mv_joint_vertical(j)) encode_mv_component(w, diff.row, &mvctx->comps[0], 0); @@ -306,135 +246,101 @@ void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2], #if CONFIG_EXT_INTER static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext, - const int_mv mvs[2], -#if CONFIG_REF_MV - const int_mv pred_mvs[2], -#endif + const int_mv mvs[2], const int_mv pred_mvs[2], nmv_context_counts *nmv_counts) { int i; PREDICTION_MODE mode = mbmi->mode; -#if !CONFIG_REF_MV - nmv_context_counts *counts = nmv_counts; -#endif if (mode == NEWMV || mode == NEW_NEWMV) { for (i = 0; i < 1 + has_second_ref(mbmi); ++i) { const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_mv; const MV diff = { mvs[i].as_mv.row - ref->row, mvs[i].as_mv.col - ref->col }; -#if CONFIG_REF_MV int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], mbmi_ext->ref_mv_stack[rf_type], i, mbmi->ref_mv_idx); nmv_context_counts *counts = &nmv_counts[nmv_ctx]; (void)pred_mvs; -#endif av1_inc_mv(&diff, counts, 1); } } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) { const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_mv; const MV diff = { mvs[1].as_mv.row - ref->row, mvs[1].as_mv.col - ref->col }; -#if CONFIG_REF_MV int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], mbmi_ext->ref_mv_stack[rf_type], 1, mbmi->ref_mv_idx); nmv_context_counts *counts = &nmv_counts[nmv_ctx]; -#endif av1_inc_mv(&diff, counts, 1); } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) { const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv; const MV diff = { mvs[0].as_mv.row - ref->row, mvs[0].as_mv.col - ref->col }; -#if CONFIG_REF_MV int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx); nmv_context_counts *counts = &nmv_counts[nmv_ctx]; -#endif av1_inc_mv(&diff, counts, 1); } } static void inc_mvs_sub8x8(const MODE_INFO *mi, int block, const int_mv mvs[2], -#if CONFIG_REF_MV const MB_MODE_INFO_EXT *mbmi_ext, -#endif nmv_context_counts *nmv_counts) { int i; PREDICTION_MODE mode = mi->bmi[block].as_mode; -#if CONFIG_REF_MV const MB_MODE_INFO *mbmi = &mi->mbmi; -#else - nmv_context_counts *counts = nmv_counts; -#endif if (mode == NEWMV || mode == NEW_NEWMV) { for (i = 0; i < 1 + has_second_ref(&mi->mbmi); ++i) { const MV *ref = &mi->bmi[block].ref_mv[i].as_mv; const MV diff = { mvs[i].as_mv.row - ref->row, mvs[i].as_mv.col - ref->col }; -#if CONFIG_REF_MV int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], mbmi_ext->ref_mv_stack[rf_type], i, mbmi->ref_mv_idx); nmv_context_counts *counts = &nmv_counts[nmv_ctx]; -#endif av1_inc_mv(&diff, counts, 1); } } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) { const MV *ref = &mi->bmi[block].ref_mv[1].as_mv; const MV diff = { mvs[1].as_mv.row - ref->row, mvs[1].as_mv.col - ref->col }; -#if CONFIG_REF_MV int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], mbmi_ext->ref_mv_stack[rf_type], 1, mbmi->ref_mv_idx); nmv_context_counts *counts = &nmv_counts[nmv_ctx]; -#endif av1_inc_mv(&diff, counts, 1); } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) { const MV *ref = &mi->bmi[block].ref_mv[0].as_mv; const MV diff = { mvs[0].as_mv.row - ref->row, mvs[0].as_mv.col - ref->col }; -#if CONFIG_REF_MV int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], mbmi_ext->ref_mv_stack[rf_type], 0, mbmi->ref_mv_idx); nmv_context_counts *counts = &nmv_counts[nmv_ctx]; -#endif av1_inc_mv(&diff, counts, 1); } } #else static void inc_mvs(const MB_MODE_INFO *mbmi, const MB_MODE_INFO_EXT *mbmi_ext, - const int_mv mvs[2], -#if CONFIG_REF_MV - const int_mv pred_mvs[2], -#endif + const int_mv mvs[2], const int_mv pred_mvs[2], nmv_context_counts *nmv_counts) { int i; -#if !CONFIG_REF_MV - nmv_context_counts *counts = nmv_counts; -#endif for (i = 0; i < 1 + has_second_ref(mbmi); ++i) { -#if CONFIG_REF_MV int8_t rf_type = av1_ref_frame_type(mbmi->ref_frame); int nmv_ctx = av1_nmv_ctx(mbmi_ext->ref_mv_count[rf_type], mbmi_ext->ref_mv_stack[rf_type], i, mbmi->ref_mv_idx); nmv_context_counts *counts = &nmv_counts[nmv_ctx]; const MV *ref = &pred_mvs[i].as_mv; -#else - const MV *ref = &mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_mv; -#endif const MV diff = { mvs[i].as_mv.row - ref->row, mvs[i].as_mv.col - ref->col }; av1_inc_mv(&diff, counts, 1); @@ -464,20 +370,11 @@ void av1_update_mv_count(ThreadData *td) { #if CONFIG_EXT_INTER if (have_newmv_in_inter_mode(mi->bmi[i].as_mode)) - inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv, -#if CONFIG_REF_MV - mbmi_ext, td->counts->mv); -#else - &td->counts->mv); -#endif + inc_mvs_sub8x8(mi, i, mi->bmi[i].as_mv, mbmi_ext, td->counts->mv); #else if (mi->bmi[i].as_mode == NEWMV) - inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv, -#if CONFIG_REF_MV - mi->bmi[i].pred_mv, td->counts->mv); -#else - &td->counts->mv); -#endif + inc_mvs(mbmi, mbmi_ext, mi->bmi[i].as_mv, mi->bmi[i].pred_mv, + td->counts->mv); #endif // CONFIG_EXT_INTER } } @@ -487,11 +384,6 @@ void av1_update_mv_count(ThreadData *td) { #else if (mbmi->mode == NEWMV) #endif // CONFIG_EXT_INTER - inc_mvs(mbmi, mbmi_ext, mbmi->mv, -#if CONFIG_REF_MV - mbmi->pred_mv, td->counts->mv); -#else - &td->counts->mv); -#endif + inc_mvs(mbmi, mbmi_ext, mbmi->mv, mbmi->pred_mv, td->counts->mv); } } diff --git a/third_party/aom/av1/encoder/encoder.c b/third_party/aom/av1/encoder/encoder.c index 027109151..4782ce2b7 100644 --- a/third_party/aom/av1/encoder/encoder.c +++ b/third_party/aom/av1/encoder/encoder.c @@ -246,29 +246,17 @@ void av1_set_high_precision_mv(AV1_COMP *cpi, int allow_high_precision_mv) { MACROBLOCK *const mb = &cpi->td.mb; cpi->common.allow_high_precision_mv = allow_high_precision_mv; -#if CONFIG_REF_MV if (cpi->common.allow_high_precision_mv) { int i; for (i = 0; i < NMV_CONTEXTS; ++i) { mb->mv_cost_stack[i] = mb->nmvcost_hp[i]; - mb->mvsadcost = mb->nmvsadcost_hp; } } else { int i; for (i = 0; i < NMV_CONTEXTS; ++i) { mb->mv_cost_stack[i] = mb->nmvcost[i]; - mb->mvsadcost = mb->nmvsadcost; } } -#else - if (cpi->common.allow_high_precision_mv) { - mb->mvcost = mb->nmvcost_hp; - mb->mvsadcost = mb->nmvcost_hp; - } else { - mb->mvcost = mb->nmvcost; - mb->mvsadcost = mb->nmvcost; - } -#endif } static BLOCK_SIZE select_sb_size(const AV1_COMP *const cpi) { @@ -334,13 +322,14 @@ static void setup_frame(AV1_COMP *cpi) { av1_zero(cpi->interp_filter_selected[0]); } #if CONFIG_EXT_REFS -#if CONFIG_LOWDELAY_COMPOUND // No change to bitstream +#if CONFIG_ONE_SIDED_COMPOUND // No change to bitstream if (cpi->sf.recode_loop == DISALLOW_RECODE) { cpi->refresh_bwd_ref_frame = cpi->refresh_last_frame; cpi->rc.is_bipred_frame = 1; } #endif #endif + cm->pre_fc = &cm->frame_contexts[cm->frame_context_idx]; cpi->vaq_refresh = 0; @@ -464,6 +453,20 @@ static void dealloc_compressor_data(AV1_COMP *cpi) { aom_free(cpi->active_map.map); cpi->active_map.map = NULL; +#if CONFIG_MOTION_VAR + aom_free(cpi->td.mb.above_pred_buf); + cpi->td.mb.above_pred_buf = NULL; + + aom_free(cpi->td.mb.left_pred_buf); + cpi->td.mb.left_pred_buf = NULL; + + aom_free(cpi->td.mb.wsrc_buf); + cpi->td.mb.wsrc_buf = NULL; + + aom_free(cpi->td.mb.mask_buf); + cpi->td.mb.mask_buf = NULL; +#endif + // Free up-sampled reference buffers. for (i = 0; i < (REF_FRAMES + 1); i++) aom_free_frame_buffer(&cpi->upsampled_ref_bufs[i].buf); @@ -492,17 +495,12 @@ static void dealloc_compressor_data(AV1_COMP *cpi) { cpi->tile_tok[0][0] = 0; av1_free_pc_tree(&cpi->td); - av1_free_var_tree(&cpi->td); #if CONFIG_PALETTE if (cpi->common.allow_screen_content_tools) aom_free(cpi->td.mb.palette_buffer); #endif // CONFIG_PALETTE - if (cpi->source_diff_var != NULL) { - aom_free(cpi->source_diff_var); - cpi->source_diff_var = NULL; - } #if CONFIG_ANS aom_buf_ans_free(&cpi->buf_ans); #endif // CONFIG_ANS @@ -511,26 +509,17 @@ static void dealloc_compressor_data(AV1_COMP *cpi) { static void save_coding_context(AV1_COMP *cpi) { CODING_CONTEXT *const cc = &cpi->coding_context; AV1_COMMON *cm = &cpi->common; -#if CONFIG_REF_MV int i; -#endif -// Stores a snapshot of key state variables which can subsequently be -// restored with a call to av1_restore_coding_context. These functions are -// intended for use in a re-code loop in av1_compress_frame where the -// quantizer value is adjusted between loop iterations. -#if CONFIG_REF_MV + // Stores a snapshot of key state variables which can subsequently be + // restored with a call to av1_restore_coding_context. These functions are + // intended for use in a re-code loop in av1_compress_frame where the + // quantizer value is adjusted between loop iterations. for (i = 0; i < NMV_CONTEXTS; ++i) { av1_copy(cc->nmv_vec_cost[i], cpi->td.mb.nmv_vec_cost[i]); av1_copy(cc->nmv_costs, cpi->nmv_costs); av1_copy(cc->nmv_costs_hp, cpi->nmv_costs_hp); } -#else - av1_copy(cc->nmvjointcost, cpi->td.mb.nmvjointcost); -#endif - - av1_copy(cc->nmvcosts, cpi->nmvcosts); - av1_copy(cc->nmvcosts_hp, cpi->nmvcosts_hp); av1_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas); av1_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas); @@ -541,24 +530,15 @@ static void save_coding_context(AV1_COMP *cpi) { static void restore_coding_context(AV1_COMP *cpi) { CODING_CONTEXT *const cc = &cpi->coding_context; AV1_COMMON *cm = &cpi->common; -#if CONFIG_REF_MV int i; -#endif -// Restore key state variables to the snapshot state stored in the -// previous call to av1_save_coding_context. -#if CONFIG_REF_MV + // Restore key state variables to the snapshot state stored in the + // previous call to av1_save_coding_context. for (i = 0; i < NMV_CONTEXTS; ++i) { av1_copy(cpi->td.mb.nmv_vec_cost[i], cc->nmv_vec_cost[i]); av1_copy(cpi->nmv_costs, cc->nmv_costs); av1_copy(cpi->nmv_costs_hp, cc->nmv_costs_hp); } -#else - av1_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost); -#endif - - av1_copy(cpi->nmvcosts, cc->nmvcosts); - av1_copy(cpi->nmvcosts_hp, cc->nmvcosts_hp); av1_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas); av1_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas); @@ -795,14 +775,12 @@ static void alloc_util_frame_buffers(AV1_COMP *cpi) { "Failed to allocate scaled last source buffer"); } -static int alloc_context_buffers_ext(AV1_COMP *cpi) { +static void alloc_context_buffers_ext(AV1_COMP *cpi) { AV1_COMMON *cm = &cpi->common; int mi_size = cm->mi_cols * cm->mi_rows; - cpi->mbmi_ext_base = aom_calloc(mi_size, sizeof(*cpi->mbmi_ext_base)); - if (!cpi->mbmi_ext_base) return 1; - - return 0; + CHECK_MEM_ERROR(cm, cpi->mbmi_ext_base, + aom_calloc(mi_size, sizeof(*cpi->mbmi_ext_base))); } void av1_alloc_compressor_data(AV1_COMP *cpi) { @@ -902,7 +880,11 @@ static void set_tile_info(AV1_COMP *cpi) { #if CONFIG_DEPENDENT_HORZTILES cm->dependent_horz_tiles = cpi->oxcf.dependent_horz_tiles; +#if CONFIG_EXT_TILE + if (cm->tile_rows <= 1) cm->dependent_horz_tiles = 0; +#else if (cm->log2_tile_rows == 0) cm->dependent_horz_tiles = 0; +#endif #if CONFIG_TILE_GROUPS if (cpi->oxcf.mtu == 0) { cm->num_tg = cpi->oxcf.num_tile_groups; @@ -1194,48 +1176,53 @@ MAKE_BFP_SAD8_WRAPPER(aom_highbd_sad4x4x8) MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x4x4d) #if CONFIG_EXT_INTER -#define HIGHBD_MBFP(BT, MSDF, MVF, MSVF) \ - cpi->fn_ptr[BT].msdf = MSDF; \ - cpi->fn_ptr[BT].mvf = MVF; \ - cpi->fn_ptr[BT].msvf = MSVF; - -#define MAKE_MBFP_SAD_WRAPPER(fnname) \ - static unsigned int fnname##_bits8( \ - const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ - int ref_stride, const uint8_t *m, int m_stride) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, m, m_stride); \ - } \ - static unsigned int fnname##_bits10( \ - const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ - int ref_stride, const uint8_t *m, int m_stride) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, m, m_stride) >> \ - 2; \ - } \ - static unsigned int fnname##_bits12( \ - const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ - int ref_stride, const uint8_t *m, int m_stride) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, m, m_stride) >> \ - 4; \ +#define HIGHBD_MBFP(BT, MCSDF, MCSVF) \ + cpi->fn_ptr[BT].msdf = MCSDF; \ + cpi->fn_ptr[BT].msvf = MCSVF; + +#define MAKE_MBFP_COMPOUND_SAD_WRAPPER(fnname) \ + static unsigned int fnname##_bits8( \ + const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ + int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m, \ + int m_stride, int invert_mask) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \ + second_pred_ptr, m, m_stride, invert_mask); \ + } \ + static unsigned int fnname##_bits10( \ + const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ + int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m, \ + int m_stride, int invert_mask) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \ + second_pred_ptr, m, m_stride, invert_mask) >> \ + 2; \ + } \ + static unsigned int fnname##_bits12( \ + const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ + int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m, \ + int m_stride, int invert_mask) { \ + return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \ + second_pred_ptr, m, m_stride, invert_mask) >> \ + 4; \ } #if CONFIG_EXT_PARTITION -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad128x128) -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad128x64) -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad64x128) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad128x128) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad128x64) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x128) #endif // CONFIG_EXT_PARTITION -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad64x64) -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad64x32) -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad32x64) -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad32x32) -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad32x16) -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad16x32) -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad16x16) -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad16x8) -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad8x16) -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad8x8) -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad8x4) -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad4x8) -MAKE_MBFP_SAD_WRAPPER(aom_highbd_masked_sad4x4) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x64) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x32) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x64) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x32) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x16) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x32) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x16) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x8) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x16) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x8) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x4) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x8) +MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x4) #endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR @@ -1401,54 +1388,38 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) { #if CONFIG_EXT_INTER #if CONFIG_EXT_PARTITION HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits8, - aom_highbd_masked_variance128x128, - aom_highbd_masked_sub_pixel_variance128x128) + aom_highbd_8_masked_sub_pixel_variance128x128) HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits8, - aom_highbd_masked_variance128x64, - aom_highbd_masked_sub_pixel_variance128x64) + aom_highbd_8_masked_sub_pixel_variance128x64) HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits8, - aom_highbd_masked_variance64x128, - aom_highbd_masked_sub_pixel_variance64x128) + aom_highbd_8_masked_sub_pixel_variance64x128) #endif // CONFIG_EXT_PARTITION HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits8, - aom_highbd_masked_variance64x64, - aom_highbd_masked_sub_pixel_variance64x64) + aom_highbd_8_masked_sub_pixel_variance64x64) HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits8, - aom_highbd_masked_variance64x32, - aom_highbd_masked_sub_pixel_variance64x32) + aom_highbd_8_masked_sub_pixel_variance64x32) HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits8, - aom_highbd_masked_variance32x64, - aom_highbd_masked_sub_pixel_variance32x64) + aom_highbd_8_masked_sub_pixel_variance32x64) HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits8, - aom_highbd_masked_variance32x32, - aom_highbd_masked_sub_pixel_variance32x32) + aom_highbd_8_masked_sub_pixel_variance32x32) HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits8, - aom_highbd_masked_variance32x16, - aom_highbd_masked_sub_pixel_variance32x16) + aom_highbd_8_masked_sub_pixel_variance32x16) HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits8, - aom_highbd_masked_variance16x32, - aom_highbd_masked_sub_pixel_variance16x32) + aom_highbd_8_masked_sub_pixel_variance16x32) HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits8, - aom_highbd_masked_variance16x16, - aom_highbd_masked_sub_pixel_variance16x16) + aom_highbd_8_masked_sub_pixel_variance16x16) HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits8, - aom_highbd_masked_variance8x16, - aom_highbd_masked_sub_pixel_variance8x16) + aom_highbd_8_masked_sub_pixel_variance8x16) HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits8, - aom_highbd_masked_variance16x8, - aom_highbd_masked_sub_pixel_variance16x8) + aom_highbd_8_masked_sub_pixel_variance16x8) HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits8, - aom_highbd_masked_variance8x8, - aom_highbd_masked_sub_pixel_variance8x8) + aom_highbd_8_masked_sub_pixel_variance8x8) HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits8, - aom_highbd_masked_variance4x8, - aom_highbd_masked_sub_pixel_variance4x8) + aom_highbd_8_masked_sub_pixel_variance4x8) HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits8, - aom_highbd_masked_variance8x4, - aom_highbd_masked_sub_pixel_variance8x4) + aom_highbd_8_masked_sub_pixel_variance8x4) HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits8, - aom_highbd_masked_variance4x4, - aom_highbd_masked_sub_pixel_variance4x4) + aom_highbd_8_masked_sub_pixel_variance4x4) #endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR #if CONFIG_EXT_PARTITION @@ -1624,53 +1595,37 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) { #if CONFIG_EXT_INTER #if CONFIG_EXT_PARTITION HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits10, - aom_highbd_10_masked_variance128x128, aom_highbd_10_masked_sub_pixel_variance128x128) HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits10, - aom_highbd_10_masked_variance128x64, aom_highbd_10_masked_sub_pixel_variance128x64) HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits10, - aom_highbd_10_masked_variance64x128, aom_highbd_10_masked_sub_pixel_variance64x128) #endif // CONFIG_EXT_PARTITION HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits10, - aom_highbd_10_masked_variance64x64, aom_highbd_10_masked_sub_pixel_variance64x64) HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits10, - aom_highbd_10_masked_variance64x32, aom_highbd_10_masked_sub_pixel_variance64x32) HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits10, - aom_highbd_10_masked_variance32x64, aom_highbd_10_masked_sub_pixel_variance32x64) HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits10, - aom_highbd_10_masked_variance32x32, aom_highbd_10_masked_sub_pixel_variance32x32) HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits10, - aom_highbd_10_masked_variance32x16, aom_highbd_10_masked_sub_pixel_variance32x16) HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits10, - aom_highbd_10_masked_variance16x32, aom_highbd_10_masked_sub_pixel_variance16x32) HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits10, - aom_highbd_10_masked_variance16x16, aom_highbd_10_masked_sub_pixel_variance16x16) HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits10, - aom_highbd_10_masked_variance8x16, aom_highbd_10_masked_sub_pixel_variance8x16) HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits10, - aom_highbd_10_masked_variance16x8, aom_highbd_10_masked_sub_pixel_variance16x8) HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits10, - aom_highbd_10_masked_variance8x8, aom_highbd_10_masked_sub_pixel_variance8x8) HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits10, - aom_highbd_10_masked_variance4x8, aom_highbd_10_masked_sub_pixel_variance4x8) HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits10, - aom_highbd_10_masked_variance8x4, aom_highbd_10_masked_sub_pixel_variance8x4) HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits10, - aom_highbd_10_masked_variance4x4, aom_highbd_10_masked_sub_pixel_variance4x4) #endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR @@ -1847,53 +1802,37 @@ static void highbd_set_var_fns(AV1_COMP *const cpi) { #if CONFIG_EXT_INTER #if CONFIG_EXT_PARTITION HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits12, - aom_highbd_12_masked_variance128x128, aom_highbd_12_masked_sub_pixel_variance128x128) HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits12, - aom_highbd_12_masked_variance128x64, aom_highbd_12_masked_sub_pixel_variance128x64) HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits12, - aom_highbd_12_masked_variance64x128, aom_highbd_12_masked_sub_pixel_variance64x128) #endif // CONFIG_EXT_PARTITION HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits12, - aom_highbd_12_masked_variance64x64, aom_highbd_12_masked_sub_pixel_variance64x64) HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits12, - aom_highbd_12_masked_variance64x32, aom_highbd_12_masked_sub_pixel_variance64x32) HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits12, - aom_highbd_12_masked_variance32x64, aom_highbd_12_masked_sub_pixel_variance32x64) HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits12, - aom_highbd_12_masked_variance32x32, aom_highbd_12_masked_sub_pixel_variance32x32) HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits12, - aom_highbd_12_masked_variance32x16, aom_highbd_12_masked_sub_pixel_variance32x16) HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits12, - aom_highbd_12_masked_variance16x32, aom_highbd_12_masked_sub_pixel_variance16x32) HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits12, - aom_highbd_12_masked_variance16x16, aom_highbd_12_masked_sub_pixel_variance16x16) HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits12, - aom_highbd_12_masked_variance8x16, aom_highbd_12_masked_sub_pixel_variance8x16) HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits12, - aom_highbd_12_masked_variance16x8, aom_highbd_12_masked_sub_pixel_variance16x8) HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits12, - aom_highbd_12_masked_variance8x8, aom_highbd_12_masked_sub_pixel_variance8x8) HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits12, - aom_highbd_12_masked_variance4x8, aom_highbd_12_masked_sub_pixel_variance4x8) HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits12, - aom_highbd_12_masked_variance8x4, aom_highbd_12_masked_sub_pixel_variance8x4) HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits12, - aom_highbd_12_masked_variance4x4, aom_highbd_12_masked_sub_pixel_variance4x4) #endif // CONFIG_EXT_INTER @@ -1979,6 +1918,18 @@ static void realloc_segmentation_maps(AV1_COMP *cpi) { aom_calloc(cm->mi_rows * cm->mi_cols, 1)); } +#if CONFIG_EXT_INTER +void set_compound_tools(AV1_COMMON *cm) { + (void)cm; +#if CONFIG_INTERINTRA + cm->allow_interintra_compound = 1; +#endif // CONFIG_INTERINTRA +#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT + cm->allow_masked_compound = 1; +#endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT +} +#endif // CONFIG_EXT_INTER + void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) { AV1_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; @@ -1994,9 +1945,7 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) { assert(cm->bit_depth > AOM_BITS_8); cpi->oxcf = *oxcf; -#if CONFIG_HIGHBITDEPTH cpi->td.mb.e_mbd.bd = (int)cm->bit_depth; -#endif // CONFIG_HIGHBITDEPTH #if CONFIG_GLOBAL_MOTION cpi->td.mb.e_mbd.global_motion = cm->global_motion; #endif // CONFIG_GLOBAL_MOTION @@ -2033,7 +1982,9 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) { av1_setup_pc_tree(&cpi->common, &cpi->td); } #endif // CONFIG_PALETTE - +#if CONFIG_EXT_INTER + set_compound_tools(cm); +#endif // CONFIG_EXT_INTER av1_reset_segment_features(cm); av1_set_high_precision_mv(cpi, 0); @@ -2107,50 +2058,6 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) { #endif // CONFIG_ANS && ANS_MAX_SYMBOLS } -#ifndef M_LOG2_E -#define M_LOG2_E 0.693147180559945309417 -#endif -#define log2f(x) (log(x) / (float)M_LOG2_E) - -#if !CONFIG_REF_MV -static void cal_nmvjointsadcost(int *mvjointsadcost) { - mvjointsadcost[0] = 600; - mvjointsadcost[1] = 300; - mvjointsadcost[2] = 300; - mvjointsadcost[3] = 300; -} -#endif - -static void cal_nmvsadcosts(int *mvsadcost[2]) { - int i = 1; - - mvsadcost[0][0] = 0; - mvsadcost[1][0] = 0; - - do { - double z = 256 * (2 * (log2f(8 * i) + .6)); - mvsadcost[0][i] = (int)z; - mvsadcost[1][i] = (int)z; - mvsadcost[0][-i] = (int)z; - mvsadcost[1][-i] = (int)z; - } while (++i <= MV_MAX); -} - -static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { - int i = 1; - - mvsadcost[0][0] = 0; - mvsadcost[1][0] = 0; - - do { - double z = 256 * (2 * (log2f(8 * i) + .6)); - mvsadcost[0][i] = (int)z; - mvsadcost[1][i] = (int)z; - mvsadcost[0][-i] = (int)z; - mvsadcost[1][-i] = (int)z; - } while (++i <= MV_MAX); -} - static INLINE void init_upsampled_ref_frame_bufs(AV1_COMP *cpi) { int i; @@ -2192,6 +2099,11 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, cpi->resize_state = 0; cpi->resize_avg_qp = 0; cpi->resize_buffer_underflow = 0; + cpi->resize_scale_num = 16; + cpi->resize_scale_den = 16; + cpi->resize_next_scale_num = 16; + cpi->resize_next_scale_den = 16; + cpi->common.buffer_pool = pool; init_config(cpi, oxcf); @@ -2223,17 +2135,10 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, realloc_segmentation_maps(cpi); -#if CONFIG_REF_MV for (i = 0; i < NMV_CONTEXTS; ++i) { memset(cpi->nmv_costs, 0, sizeof(cpi->nmv_costs)); memset(cpi->nmv_costs_hp, 0, sizeof(cpi->nmv_costs_hp)); } -#endif - - memset(cpi->nmvcosts, 0, sizeof(cpi->nmvcosts)); - memset(cpi->nmvcosts_hp, 0, sizeof(cpi->nmvcosts_hp)); - memset(cpi->nmvsadcosts, 0, sizeof(cpi->nmvsadcosts)); - memset(cpi->nmvsadcosts_hp, 0, sizeof(cpi->nmvsadcosts_hp)); for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0])); i++) { @@ -2296,27 +2201,12 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, cpi->first_time_stamp_ever = INT64_MAX; -#if CONFIG_REF_MV for (i = 0; i < NMV_CONTEXTS; ++i) { cpi->td.mb.nmvcost[i][0] = &cpi->nmv_costs[i][0][MV_MAX]; cpi->td.mb.nmvcost[i][1] = &cpi->nmv_costs[i][1][MV_MAX]; cpi->td.mb.nmvcost_hp[i][0] = &cpi->nmv_costs_hp[i][0][MV_MAX]; cpi->td.mb.nmvcost_hp[i][1] = &cpi->nmv_costs_hp[i][1][MV_MAX]; } -#else - cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost); - cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX]; - cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX]; - cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX]; - cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX]; -#endif - cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX]; - cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX]; - cal_nmvsadcosts(cpi->td.mb.nmvsadcost); - - cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX]; - cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX]; - cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp); #ifdef OUTPUT_YUV_SKINMAP yuv_skinmap_file = fopen("skinmap.yuv", "ab"); @@ -2363,17 +2253,36 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, } #endif +#if CONFIG_MOTION_VAR +#if CONFIG_HIGHBITDEPTH + int buf_scaler = 2; +#else + int buf_scaler = 1; +#endif + CHECK_MEM_ERROR( + cm, cpi->td.mb.above_pred_buf, + (uint8_t *)aom_memalign(16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE * + sizeof(*cpi->td.mb.above_pred_buf))); + CHECK_MEM_ERROR( + cm, cpi->td.mb.left_pred_buf, + (uint8_t *)aom_memalign(16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE * + sizeof(*cpi->td.mb.left_pred_buf))); + + CHECK_MEM_ERROR(cm, cpi->td.mb.wsrc_buf, + (int32_t *)aom_memalign( + 16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.wsrc_buf))); + + CHECK_MEM_ERROR(cm, cpi->td.mb.mask_buf, + (int32_t *)aom_memalign( + 16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.mask_buf))); + +#endif + init_upsampled_ref_frame_bufs(cpi); av1_set_speed_features_framesize_independent(cpi); av1_set_speed_features_framesize_dependent(cpi); - // Allocate memory to store variances for a frame. - CHECK_MEM_ERROR(cm, cpi->source_diff_var, - aom_calloc(cm->MBs, sizeof(*cpi->source_diff_var))); - cpi->source_var_thresh = 0; - cpi->frames_till_next_var_check = 0; - #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX3F, SDX8F, SDX4DF) \ cpi->fn_ptr[BT].sdf = SDF; \ cpi->fn_ptr[BT].sdaf = SDAF; \ @@ -2499,45 +2408,29 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, #endif // CONFIG_MOTION_VAR #if CONFIG_EXT_INTER -#define MBFP(BT, MSDF, MVF, MSVF) \ - cpi->fn_ptr[BT].msdf = MSDF; \ - cpi->fn_ptr[BT].mvf = MVF; \ - cpi->fn_ptr[BT].msvf = MSVF; +#define MBFP(BT, MCSDF, MCSVF) \ + cpi->fn_ptr[BT].msdf = MCSDF; \ + cpi->fn_ptr[BT].msvf = MCSVF; #if CONFIG_EXT_PARTITION - MBFP(BLOCK_128X128, aom_masked_sad128x128, aom_masked_variance128x128, + MBFP(BLOCK_128X128, aom_masked_sad128x128, aom_masked_sub_pixel_variance128x128) - MBFP(BLOCK_128X64, aom_masked_sad128x64, aom_masked_variance128x64, - aom_masked_sub_pixel_variance128x64) - MBFP(BLOCK_64X128, aom_masked_sad64x128, aom_masked_variance64x128, - aom_masked_sub_pixel_variance64x128) + MBFP(BLOCK_128X64, aom_masked_sad128x64, aom_masked_sub_pixel_variance128x64) + MBFP(BLOCK_64X128, aom_masked_sad64x128, aom_masked_sub_pixel_variance64x128) #endif // CONFIG_EXT_PARTITION - MBFP(BLOCK_64X64, aom_masked_sad64x64, aom_masked_variance64x64, - aom_masked_sub_pixel_variance64x64) - MBFP(BLOCK_64X32, aom_masked_sad64x32, aom_masked_variance64x32, - aom_masked_sub_pixel_variance64x32) - MBFP(BLOCK_32X64, aom_masked_sad32x64, aom_masked_variance32x64, - aom_masked_sub_pixel_variance32x64) - MBFP(BLOCK_32X32, aom_masked_sad32x32, aom_masked_variance32x32, - aom_masked_sub_pixel_variance32x32) - MBFP(BLOCK_32X16, aom_masked_sad32x16, aom_masked_variance32x16, - aom_masked_sub_pixel_variance32x16) - MBFP(BLOCK_16X32, aom_masked_sad16x32, aom_masked_variance16x32, - aom_masked_sub_pixel_variance16x32) - MBFP(BLOCK_16X16, aom_masked_sad16x16, aom_masked_variance16x16, - aom_masked_sub_pixel_variance16x16) - MBFP(BLOCK_16X8, aom_masked_sad16x8, aom_masked_variance16x8, - aom_masked_sub_pixel_variance16x8) - MBFP(BLOCK_8X16, aom_masked_sad8x16, aom_masked_variance8x16, - aom_masked_sub_pixel_variance8x16) - MBFP(BLOCK_8X8, aom_masked_sad8x8, aom_masked_variance8x8, - aom_masked_sub_pixel_variance8x8) - MBFP(BLOCK_4X8, aom_masked_sad4x8, aom_masked_variance4x8, - aom_masked_sub_pixel_variance4x8) - MBFP(BLOCK_8X4, aom_masked_sad8x4, aom_masked_variance8x4, - aom_masked_sub_pixel_variance8x4) - MBFP(BLOCK_4X4, aom_masked_sad4x4, aom_masked_variance4x4, - aom_masked_sub_pixel_variance4x4) + MBFP(BLOCK_64X64, aom_masked_sad64x64, aom_masked_sub_pixel_variance64x64) + MBFP(BLOCK_64X32, aom_masked_sad64x32, aom_masked_sub_pixel_variance64x32) + MBFP(BLOCK_32X64, aom_masked_sad32x64, aom_masked_sub_pixel_variance32x64) + MBFP(BLOCK_32X32, aom_masked_sad32x32, aom_masked_sub_pixel_variance32x32) + MBFP(BLOCK_32X16, aom_masked_sad32x16, aom_masked_sub_pixel_variance32x16) + MBFP(BLOCK_16X32, aom_masked_sad16x32, aom_masked_sub_pixel_variance16x32) + MBFP(BLOCK_16X16, aom_masked_sad16x16, aom_masked_sub_pixel_variance16x16) + MBFP(BLOCK_16X8, aom_masked_sad16x8, aom_masked_sub_pixel_variance16x8) + MBFP(BLOCK_8X16, aom_masked_sad8x16, aom_masked_sub_pixel_variance8x16) + MBFP(BLOCK_8X8, aom_masked_sad8x8, aom_masked_sub_pixel_variance8x8) + MBFP(BLOCK_4X8, aom_masked_sad4x8, aom_masked_sub_pixel_variance4x8) + MBFP(BLOCK_8X4, aom_masked_sad8x4, aom_masked_sub_pixel_variance8x4) + MBFP(BLOCK_4X4, aom_masked_sad4x4, aom_masked_sub_pixel_variance4x4) #endif // CONFIG_EXT_INTER #if CONFIG_HIGHBITDEPTH @@ -2555,6 +2448,9 @@ AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, #endif av1_loop_filter_init(cm); +#if CONFIG_FRAME_SUPERRES + cm->superres_scale_numerator = SUPERRES_SCALE_DENOMINATOR; +#endif // CONFIG_FRAME_SUPERRES #if CONFIG_LOOP_RESTORATION av1_loop_restoration_precal(); #endif // CONFIG_LOOP_RESTORATION @@ -2671,11 +2567,16 @@ void av1_remove_compressor(AV1_COMP *cpi) { if (t < cpi->num_workers - 1) { #if CONFIG_PALETTE if (cpi->common.allow_screen_content_tools) - aom_free(thread_data->td->mb.palette_buffer); + aom_free(thread_data->td->palette_buffer); #endif // CONFIG_PALETTE +#if CONFIG_MOTION_VAR + aom_free(thread_data->td->above_pred_buf); + aom_free(thread_data->td->left_pred_buf); + aom_free(thread_data->td->wsrc_buf); + aom_free(thread_data->td->mask_buf); +#endif // CONFIG_MOTION_VAR aom_free(thread_data->td->counts); av1_free_pc_tree(thread_data->td); - av1_free_var_tree(thread_data->td); aom_free(thread_data->td); } } @@ -2935,48 +2836,6 @@ void aom_write_one_yuv_frame(AV1_COMMON *cm, YV12_BUFFER_CONFIG *s) { #endif // OUTPUT_YUV_REC #if CONFIG_HIGHBITDEPTH -static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - int bd) { -#else -static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst) { -#endif // CONFIG_HIGHBITDEPTH - // TODO(dkovalev): replace YV12_BUFFER_CONFIG with aom_image_t - int i; - const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer, - src->v_buffer }; - const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride }; - const int src_widths[3] = { src->y_crop_width, src->uv_crop_width, - src->uv_crop_width }; - const int src_heights[3] = { src->y_crop_height, src->uv_crop_height, - src->uv_crop_height }; - uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer }; - const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride }; - const int dst_widths[3] = { dst->y_crop_width, dst->uv_crop_width, - dst->uv_crop_width }; - const int dst_heights[3] = { dst->y_crop_height, dst->uv_crop_height, - dst->uv_crop_height }; - - for (i = 0; i < MAX_MB_PLANE; ++i) { -#if CONFIG_HIGHBITDEPTH - if (src->flags & YV12_FLAG_HIGHBITDEPTH) { - av1_highbd_resize_plane(srcs[i], src_heights[i], src_widths[i], - src_strides[i], dsts[i], dst_heights[i], - dst_widths[i], dst_strides[i], bd); - } else { - av1_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i], - dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]); - } -#else - av1_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i], - dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]); -#endif // CONFIG_HIGHBITDEPTH - } - aom_extend_frame_borders(dst); -} - -#if CONFIG_HIGHBITDEPTH static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int planes, int bd) { @@ -3041,22 +2900,6 @@ static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, aom_extend_frame_borders(dst); } -static int scale_down(AV1_COMP *cpi, int q) { - RATE_CONTROL *const rc = &cpi->rc; - GF_GROUP *const gf_group = &cpi->twopass.gf_group; - int scale = 0; - assert(frame_is_kf_gf_arf(cpi)); - - if (rc->frame_size_selector == UNSCALED && - q >= rc->rf_level_maxq[gf_group->rf_level[gf_group->index]]) { - const int max_size_thresh = - (int)(rate_thresh_mult[SCALE_STEP1] * - AOMMAX(rc->this_frame_target, rc->avg_frame_bandwidth)); - scale = rc->projected_frame_size > max_size_thresh ? 1 : 0; - } - return scale; -} - #if CONFIG_GLOBAL_MOTION #define GM_RECODE_LOOP_NUM4X4_FACTOR 192 static int recode_loop_test_global_motion(AV1_COMP *cpi) { @@ -3070,11 +2913,8 @@ static int recode_loop_test_global_motion(AV1_COMP *cpi) { cpi->gmparams_cost[i]) { set_default_warp_params(&cm->global_motion[i]); cpi->gmparams_cost[i] = 0; -#if CONFIG_REF_MV recode = 1; -#else recode |= (rdc->global_motion_used[i] > 0); -#endif } } return recode; @@ -3093,13 +2933,6 @@ static int recode_loop_test(AV1_COMP *cpi, int high_limit, int low_limit, int q, if ((rc->projected_frame_size >= rc->max_frame_bandwidth) || (cpi->sf.recode_loop == ALLOW_RECODE) || (frame_is_kfgfarf && (cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF))) { - if (frame_is_kfgfarf && (oxcf->resize_mode == RESIZE_DYNAMIC) && - scale_down(cpi, q)) { - // Code this group at a lower resolution. - cpi->resize_pending = 1; - return 1; - } - // TODO(agrange) high_limit could be greater than the scale-down threshold. if ((rc->projected_frame_size > high_limit && q < maxq) || (rc->projected_frame_size < low_limit && q > minq)) { @@ -3863,6 +3696,9 @@ static void set_size_independent_vars(AV1_COMP *cpi) { av1_set_rd_speed_thresholds(cpi); av1_set_rd_speed_thresholds_sub8x8(cpi); cpi->common.interp_filter = cpi->sf.default_interp_filter; +#if CONFIG_EXT_INTER + if (!frame_is_intra_only(&cpi->common)) set_compound_tools(&cpi->common); +#endif // CONFIG_EXT_INTER } static void set_size_dependent_vars(AV1_COMP *cpi, int *q, int *bottom_index, @@ -3916,43 +3752,52 @@ static void set_restoration_tilesize(int width, int height, } #endif // CONFIG_LOOP_RESTORATION -static void set_frame_size(AV1_COMP *cpi) { - int ref_frame; +static void set_scaled_size(AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; AV1EncoderConfig *const oxcf = &cpi->oxcf; - MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; - if (oxcf->pass == 2 && oxcf->rc_mode == AOM_VBR && - ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) || - (oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending))) { - av1_calculate_coded_size(cpi, &oxcf->scaled_frame_width, - &oxcf->scaled_frame_height); - - // There has been a change in frame size. - av1_set_size_literal(cpi, oxcf->scaled_frame_width, - oxcf->scaled_frame_height); + // TODO(afergs): Replace with call to av1_resize_pending? Could replace + // scaled_size_set as well. + // TODO(afergs): Realistically, if resize_pending is true, then the other + // conditions must already be satisfied. + // Try this first: + // av1_resize_pending && + // (DYNAMIC && (1 Pass CBR || 2 Pass VBR) + // STATIC && FIRST_FRAME) + // Really, av1_resize_pending should just reflect the above. + // TODO(afergs): Allow fixed resizing in AOM_CBR mode? + // 2 Pass VBR: Resize if fixed resize and first frame, or dynamic resize and + // a resize is pending. + // 1 Pass CBR: Resize if dynamic resize and resize pending. + if ((oxcf->pass == 2 && oxcf->rc_mode == AOM_VBR && + ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) || + (oxcf->resize_mode == RESIZE_DYNAMIC && av1_resize_pending(cpi)))) || + (oxcf->pass == 0 && oxcf->rc_mode == AOM_CBR && + oxcf->resize_mode == RESIZE_DYNAMIC && av1_resize_pending(cpi))) { + // TODO(afergs): This feels hacky... Should it just set? Should + // av1_set_next_scaled_size be a library function? + av1_calculate_next_scaled_size(cpi, &oxcf->scaled_frame_width, + &oxcf->scaled_frame_height); } +} - if (oxcf->pass == 0 && oxcf->rc_mode == AOM_CBR && - oxcf->resize_mode == RESIZE_DYNAMIC) { - if (cpi->resize_pending == 1) { - oxcf->scaled_frame_width = - (cm->width * cpi->resize_scale_num) / cpi->resize_scale_den; - oxcf->scaled_frame_height = - (cm->height * cpi->resize_scale_num) / cpi->resize_scale_den; - } else if (cpi->resize_pending == -1) { - // Go back up to original size. - oxcf->scaled_frame_width = oxcf->width; - oxcf->scaled_frame_height = oxcf->height; - } - if (cpi->resize_pending != 0) { - // There has been a change in frame size. - av1_set_size_literal(cpi, oxcf->scaled_frame_width, - oxcf->scaled_frame_height); +static void set_frame_size(AV1_COMP *cpi, int width, int height) { + int ref_frame; + AV1_COMMON *const cm = &cpi->common; + AV1EncoderConfig *const oxcf = &cpi->oxcf; + MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; - // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed. - set_mv_search_params(cpi); - } + if (width != cm->width || height != cm->height) { + // There has been a change in the encoded frame size + av1_set_size_literal(cpi, width, height); + + // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed. + // TODO(afergs): Make condition just (pass == 0) or (rc_mode == CBR) - + // UNLESS CBR starts allowing FIXED resizing. Then the resize + // mode will need to get checked too. + if (oxcf->pass == 0 && oxcf->rc_mode == AOM_CBR && + oxcf->resize_mode == RESIZE_DYNAMIC) + set_mv_search_params(cpi); // TODO(afergs): Needed? Caller calls after... } #if !CONFIG_XIPHRC @@ -4012,10 +3857,33 @@ static void set_frame_size(AV1_COMP *cpi) { ref_buf->buf = NULL; } } +#if CONFIG_INTRABC +#if CONFIG_HIGHBITDEPTH + av1_setup_scale_factors_for_frame(&xd->sf_identity, cm->width, cm->height, + cm->width, cm->height, + cm->use_highbitdepth); +#else + av1_setup_scale_factors_for_frame(&xd->sf_identity, cm->width, cm->height, + cm->width, cm->height); +#endif // CONFIG_HIGHBITDEPTH +#endif // CONFIG_INTRABC set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME); } +static void setup_frame_size(AV1_COMP *cpi) { + set_scaled_size(cpi); +#if CONFIG_FRAME_SUPERRES + int encode_width; + int encode_height; + av1_calculate_superres_size(cpi, &encode_width, &encode_height); + set_frame_size(cpi, encode_width, encode_height); +#else + set_frame_size(cpi, cpi->oxcf.scaled_frame_width, + cpi->oxcf.scaled_frame_height); +#endif // CONFIG_FRAME_SUPERRES +} + static void reset_use_upsampled_references(AV1_COMP *cpi) { MV_REFERENCE_FRAME ref_frame; @@ -4039,7 +3907,15 @@ static void encode_without_recode_loop(AV1_COMP *cpi) { aom_clear_system_state(); - set_frame_size(cpi); +#if CONFIG_FRAME_SUPERRES + // TODO(afergs): Figure out when is actually a good time to do superres + cm->superres_scale_numerator = SUPERRES_SCALE_DENOMINATOR; + // (uint8_t)(rand() % 9 + SUPERRES_SCALE_NUMERATOR_MIN); + cpi->superres_pending = cpi->oxcf.superres_enabled && 0; +#endif // CONFIG_FRAME_SUPERRES + + setup_frame_size(cpi); + av1_resize_step(cpi); // For 1 pass CBR under dynamic resize mode: use faster scaling for source. // Only for 2x2 scaling for now. @@ -4075,19 +3951,9 @@ static void encode_without_recode_loop(AV1_COMP *cpi) { reset_use_upsampled_references(cpi); av1_set_quantizer(cm, q); - av1_set_variance_partition_thresholds(cpi, q); - setup_frame(cpi); - -#if CONFIG_SUBFRAME_PROB_UPDATE - cm->do_subframe_update = cm->tile_cols == 1 && cm->tile_rows == 1; - av1_copy(cm->starting_coef_probs, cm->fc->coef_probs); - av1_copy(cpi->subframe_stats.enc_starting_coef_probs, cm->fc->coef_probs); - cm->coef_probs_update_idx = 0; - av1_copy(cpi->subframe_stats.coef_probs_buf[0], cm->fc->coef_probs); -#endif // CONFIG_SUBFRAME_PROB_UPDATE - suppress_active_map(cpi); + // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. if (cpi->oxcf.aq_mode == VARIANCE_AQ) { @@ -4102,6 +3968,11 @@ static void encode_without_recode_loop(AV1_COMP *cpi) { // transform / motion compensation build reconstruction frame av1_encode_frame(cpi); +#if CONFIG_FRAME_SUPERRES + // TODO(afergs): Upscale the frame to show + cpi->superres_pending = 0; +#endif // CONFIG_FRAME_SUPERRES + // Update some stats from cyclic refresh, and check if we should not update // golden reference, for 1 pass CBR. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->frame_type != KEY_FRAME && @@ -4136,9 +4007,13 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size, do { aom_clear_system_state(); - set_frame_size(cpi); + setup_frame_size(cpi); - if (loop_count == 0 || cpi->resize_pending != 0) { +#if CONFIG_FRAME_SUPERRES + if (loop_count == 0 || av1_resize_pending(cpi) || cpi->superres_pending) { +#else + if (loop_count == 0 || av1_resize_pending(cpi)) { +#endif // CONFIG_FRAME_SUPERRES set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); // cpi->sf.use_upsampled_references can be different from frame to frame. @@ -4159,8 +4034,8 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size, undershoot_seen = 0; #endif - // Reconfiguration for change in frame size has concluded. - cpi->resize_pending = 0; + // Advance resize to next state now that updates are done + av1_resize_step(cpi); q_low = bottom_index; q_high = top_index; @@ -4208,26 +4083,6 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size, } #endif // CONFIG_Q_ADAPT_PROBS -#if CONFIG_SUBFRAME_PROB_UPDATE - cm->do_subframe_update = cm->tile_cols == 1 && cm->tile_rows == 1; - if (loop_count == 0 || frame_is_intra_only(cm) || - cm->error_resilient_mode) { - av1_copy(cm->starting_coef_probs, cm->fc->coef_probs); - av1_copy(cpi->subframe_stats.enc_starting_coef_probs, cm->fc->coef_probs); - } else { - if (cm->do_subframe_update) { - av1_copy(cm->fc->coef_probs, - cpi->subframe_stats.enc_starting_coef_probs); - av1_copy(cm->starting_coef_probs, - cpi->subframe_stats.enc_starting_coef_probs); - av1_zero(cpi->subframe_stats.coef_counts_buf); - av1_zero(cpi->subframe_stats.eob_counts_buf); - } - } - cm->coef_probs_update_idx = 0; - av1_copy(cpi->subframe_stats.coef_probs_buf[0], cm->fc->coef_probs); -#endif // CONFIG_SUBFRAME_PROB_UPDATE - // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. if (cpi->oxcf.aq_mode == VARIANCE_AQ) { @@ -4318,23 +4173,9 @@ static void encode_with_recode_loop(AV1_COMP *cpi, size_t *size, int last_q = q; #if !CONFIG_XIPHRC int retries = 0; -#endif - if (cpi->resize_pending == 1) { - // Change in frame size so go back around the recode loop. - cpi->rc.frame_size_selector = - SCALE_STEP1 - cpi->rc.frame_size_selector; - cpi->rc.next_frame_size_selector = cpi->rc.frame_size_selector; + // TODO(afergs): Replace removed recode when av1_resize_pending is true -#if CONFIG_INTERNAL_STATS - ++cpi->tot_recode_hits; -#endif - ++loop_count; - loop = 1; - continue; - } - -#if !CONFIG_XIPHRC // Frame size out of permitted range: // Update correction factor & compute new Q to try... // Frame is too large @@ -4438,7 +4279,7 @@ static int get_ref_frame_flags(const AV1_COMP *cpi) { const int last3_is_last = map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[0]]; const int gld_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idxes[0]]; -#if CONFIG_LOWDELAY_COMPOUND +#if CONFIG_ONE_SIDED_COMPOUND const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idxes[0]]; const int last3_is_last2 = map[cpi->lst_fb_idxes[2]] == map[cpi->lst_fb_idxes[1]]; @@ -4491,7 +4332,7 @@ static int get_ref_frame_flags(const AV1_COMP *cpi) { if (gld_is_last2 || gld_is_last3) flags &= ~AOM_GOLD_FLAG; -#if CONFIG_LOWDELAY_COMPOUND // Changes LL & HL bitstream +#if CONFIG_ONE_SIDED_COMPOUND // Changes LL & HL bitstream /* Allow biprediction between two identical frames (e.g. bwd_is_last = 1) */ if (bwd_is_alt && (flags & AOM_BWD_FLAG)) flags &= ~AOM_BWD_FLAG; #else @@ -4522,36 +4363,6 @@ static void set_ext_overrides(AV1_COMP *cpi) { } } -YV12_BUFFER_CONFIG *av1_scale_if_required_fast(AV1_COMMON *cm, - YV12_BUFFER_CONFIG *unscaled, - YV12_BUFFER_CONFIG *scaled) { - if (cm->mi_cols * MI_SIZE != unscaled->y_width || - cm->mi_rows * MI_SIZE != unscaled->y_height) { - // For 2x2 scaling down. - aom_scale_frame(unscaled, scaled, unscaled->y_buffer, 9, 2, 1, 2, 1, 0); - aom_extend_frame_borders(scaled); - return scaled; - } else { - return unscaled; - } -} - -YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm, - YV12_BUFFER_CONFIG *unscaled, - YV12_BUFFER_CONFIG *scaled) { - if (cm->mi_cols * MI_SIZE != unscaled->y_width || - cm->mi_rows * MI_SIZE != unscaled->y_height) { -#if CONFIG_HIGHBITDEPTH - scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth); -#else - scale_and_extend_frame_nonnormative(unscaled, scaled); -#endif // CONFIG_HIGHBITDEPTH - return scaled; - } else { - return unscaled; - } -} - static void set_arf_sign_bias(AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; int arf_sign_bias; @@ -5014,9 +4825,6 @@ static void encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, av1_accumulate_frame_counts(&aggregate_fc, &cm->counts); #endif // CONFIG_ENTROPY_STATS if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) { -#if CONFIG_SUBFRAME_PROB_UPDATE - cm->partial_prob_update = 0; -#endif // CONFIG_SUBFRAME_PROB_UPDATE av1_adapt_coef_probs(cm); av1_adapt_intra_frame_probs(cm); #if CONFIG_EC_ADAPT @@ -5767,7 +5575,8 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags, #else av1_rc_get_second_pass_params(cpi); } else if (oxcf->pass == 1) { - set_frame_size(cpi); + setup_frame_size(cpi); + av1_resize_step(cpi); } #endif @@ -5900,8 +5709,7 @@ int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode, return 0; } -int av1_set_size_literal(AV1_COMP *cpi, unsigned int width, - unsigned int height) { +int av1_set_size_literal(AV1_COMP *cpi, int width, int height) { AV1_COMMON *cm = &cpi->common; #if CONFIG_HIGHBITDEPTH check_initial_width(cpi, cm->use_highbitdepth, 1, 1); @@ -5909,21 +5717,20 @@ int av1_set_size_literal(AV1_COMP *cpi, unsigned int width, check_initial_width(cpi, 1, 1); #endif // CONFIG_HIGHBITDEPTH - if (width) { - cm->width = width; - if (cm->width > cpi->initial_width) { - cm->width = cpi->initial_width; - printf("Warning: Desired width too large, changed to %d\n", cm->width); - } + if (width <= 0 || height <= 0) return 1; + + cm->width = width; + if (cm->width > cpi->initial_width) { + cm->width = cpi->initial_width; + printf("Warning: Desired width too large, changed to %d\n", cm->width); } - if (height) { - cm->height = height; - if (cm->height > cpi->initial_height) { - cm->height = cpi->initial_height; - printf("Warning: Desired height too large, changed to %d\n", cm->height); - } + cm->height = height; + if (cm->height > cpi->initial_height) { + cm->height = cpi->initial_height; + printf("Warning: Desired height too large, changed to %d\n", cm->height); } + assert(cm->width <= cpi->initial_width); assert(cm->height <= cpi->initial_height); diff --git a/third_party/aom/av1/encoder/encoder.h b/third_party/aom/av1/encoder/encoder.h index 4e7aef8fc..ee1257c2d 100644 --- a/third_party/aom/av1/encoder/encoder.h +++ b/third_party/aom/av1/encoder/encoder.h @@ -37,7 +37,6 @@ #include "av1/encoder/rd.h" #include "av1/encoder/speed_features.h" #include "av1/encoder/tokenize.h" -#include "av1/encoder/variance_tree.h" #if CONFIG_XIPHRC #include "av1/encoder/ratectrl_xiph.h" #endif @@ -54,15 +53,9 @@ extern "C" { #endif typedef struct { - int nmvjointcost[MV_JOINTS]; - int nmvcosts[2][MV_VALS]; - int nmvcosts_hp[2][MV_VALS]; - -#if CONFIG_REF_MV int nmv_vec_cost[NMV_CONTEXTS][MV_JOINTS]; int nmv_costs[NMV_CONTEXTS][2][MV_VALS]; int nmv_costs_hp[NMV_CONTEXTS][2][MV_VALS]; -#endif // 0 = Intra, Last, GF, ARF signed char last_ref_lf_deltas[TOTAL_REFS_PER_FRAME]; @@ -210,6 +203,11 @@ typedef struct AV1EncoderConfig { int scaled_frame_width; int scaled_frame_height; +#if CONFIG_FRAME_SUPERRES + // Frame Super-Resolution size scaling + int superres_enabled; +#endif // CONFIG_FRAME_SUPERRES + // Enable feature to reduce the frame quantization every x frames. int frame_periodic_boost; @@ -323,9 +321,16 @@ typedef struct ThreadData { PICK_MODE_CONTEXT *leaf_tree; PC_TREE *pc_tree; PC_TREE *pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1]; +#if CONFIG_MOTION_VAR + int32_t *wsrc_buf; + int32_t *mask_buf; + uint8_t *above_pred_buf; + uint8_t *left_pred_buf; +#endif - VAR_TREE *var_tree; - VAR_TREE *var_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1]; +#if CONFIG_PALETTE + PALETTE_BUFFER *palette_buffer; +#endif // CONFIG_PALETTE } ThreadData; struct EncWorkerData; @@ -350,16 +355,6 @@ typedef struct { YV12_BUFFER_CONFIG buf; } EncRefCntBuffer; -#if CONFIG_SUBFRAME_PROB_UPDATE -typedef struct SUBFRAME_STATS { - av1_coeff_probs_model coef_probs_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES]; - av1_coeff_count coef_counts_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES]; - unsigned int eob_counts_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES][REF_TYPES] - [COEF_BANDS][COEFF_CONTEXTS]; - av1_coeff_probs_model enc_starting_coef_probs[TX_SIZES][PLANE_TYPES]; -} SUBFRAME_STATS; -#endif // CONFIG_SUBFRAME_PROB_UPDATE - typedef struct TileBufferEnc { uint8_t *data; size_t size; @@ -369,14 +364,7 @@ typedef struct AV1_COMP { QUANTS quants; ThreadData td; MB_MODE_INFO_EXT *mbmi_ext_base; - DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]); // 8: SIMD width - DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]); // 8: SIMD width -#if CONFIG_NEW_QUANT - DECLARE_ALIGNED(16, dequant_val_type_nuq, - y_dequant_val_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS]); - DECLARE_ALIGNED(16, dequant_val_type_nuq, - uv_dequant_val_nuq[QUANT_PROFILES][QINDEX_RANGE][COEF_BANDS]); -#endif // CONFIG_NEW_QUANT + Dequants dequants; AV1_COMMON common; AV1EncoderConfig oxcf; struct lookahead_ctx *lookahead; @@ -443,15 +431,8 @@ typedef struct AV1_COMP { CODING_CONTEXT coding_context; -#if CONFIG_REF_MV int nmv_costs[NMV_CONTEXTS][2][MV_VALS]; int nmv_costs_hp[NMV_CONTEXTS][2][MV_VALS]; -#endif - - int nmvcosts[2][MV_VALS]; - int nmvcosts_hp[2][MV_VALS]; - int nmvsadcosts[2][MV_VALS]; - int nmvsadcosts_hp[2][MV_VALS]; int64_t last_time_stamp_seen; int64_t last_end_time_stamp_seen; @@ -543,29 +524,23 @@ typedef struct AV1_COMP { // number of MBs in the current frame when the frame is // scaled. - // Store frame variance info in SOURCE_VAR_BASED_PARTITION search type. - DIFF *source_diff_var; - // The threshold used in SOURCE_VAR_BASED_PARTITION search type. - unsigned int source_var_thresh; - int frames_till_next_var_check; - int frame_flags; search_site_config ss_cfg; int mbmode_cost[BLOCK_SIZE_GROUPS][INTRA_MODES]; -#if CONFIG_REF_MV int newmv_mode_cost[NEWMV_MODE_CONTEXTS][2]; int zeromv_mode_cost[ZEROMV_MODE_CONTEXTS][2]; int refmv_mode_cost[REFMV_MODE_CONTEXTS][2]; int drl_mode_cost0[DRL_MODE_CONTEXTS][2]; -#endif unsigned int inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES]; #if CONFIG_EXT_INTER unsigned int inter_compound_mode_cost[INTER_MODE_CONTEXTS] [INTER_COMPOUND_MODES]; +#if CONFIG_INTERINTRA unsigned int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES]; +#endif // CONFIG_INTERINTRA #endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION int motion_mode_cost[BLOCK_SIZES][MOTION_MODES]; @@ -625,24 +600,18 @@ typedef struct AV1_COMP { TileBufferEnc tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS]; - int resize_pending; int resize_state; int resize_scale_num; int resize_scale_den; + int resize_next_scale_num; + int resize_next_scale_den; int resize_avg_qp; int resize_buffer_underflow; int resize_count; - // VAR_BASED_PARTITION thresholds - // 0 - threshold_128x128; - // 1 - threshold_64x64; - // 2 - threshold_32x32; - // 3 - threshold_16x16; - // 4 - threshold_8x8; - int64_t vbp_thresholds[5]; - int64_t vbp_threshold_minmax; - int64_t vbp_threshold_sad; - BLOCK_SIZE vbp_bsize_min; +#if CONFIG_FRAME_SUPERRES + int superres_pending; +#endif // CONFIG_FRAME_SUPERRES // VARIANCE_AQ segment map refresh int vaq_refresh; @@ -652,12 +621,6 @@ typedef struct AV1_COMP { AVxWorker *workers; struct EncWorkerData *tile_thr_data; AV1LfSync lf_row_sync; -#if CONFIG_SUBFRAME_PROB_UPDATE - SUBFRAME_STATS subframe_stats; - // TODO(yaowu): minimize the size of count buffers - SUBFRAME_STATS wholeframe_stats; - av1_coeff_stats branch_ct_buf[COEF_PROBS_BUFS][TX_SIZES][PLANE_TYPES]; -#endif // CONFIG_SUBFRAME_PROB_UPDATE #if CONFIG_ANS struct BufAnsCoder buf_ans; #endif @@ -720,8 +683,8 @@ int av1_get_active_map(AV1_COMP *cpi, unsigned char *map, int rows, int cols); int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode, AOM_SCALING vert_mode); -int av1_set_size_literal(AV1_COMP *cpi, unsigned int width, - unsigned int height); +// Returns 1 if the assigned width or height was <= 0. +int av1_set_size_literal(AV1_COMP *cpi, int width, int height); int av1_get_quantizer(struct AV1_COMP *cpi); @@ -774,7 +737,7 @@ static INLINE const YV12_BUFFER_CONFIG *get_upsampled_ref( return &cpi->upsampled_ref_bufs[buf_idx].buf; } -#if CONFIG_EXT_REFS +#if CONFIG_EXT_REFS || CONFIG_TEMPMV_SIGNALING static INLINE int enc_is_ref_frame_buf(AV1_COMP *cpi, RefCntBuffer *frame_buf) { MV_REFERENCE_FRAME ref_frame; AV1_COMMON *const cm = &cpi->common; @@ -819,14 +782,6 @@ void av1_set_high_precision_mv(AV1_COMP *cpi, int allow_high_precision_mv); void av1_set_temporal_mv_prediction(AV1_COMP *cpi, int allow_tempmv_prediction); #endif -YV12_BUFFER_CONFIG *av1_scale_if_required_fast(AV1_COMMON *cm, - YV12_BUFFER_CONFIG *unscaled, - YV12_BUFFER_CONFIG *scaled); - -YV12_BUFFER_CONFIG *av1_scale_if_required(AV1_COMMON *cm, - YV12_BUFFER_CONFIG *unscaled, - YV12_BUFFER_CONFIG *scaled); - void av1_apply_encoding_flags(AV1_COMP *cpi, aom_enc_frame_flags_t flags); static INLINE int is_altref_enabled(const AV1_COMP *const cpi) { @@ -876,6 +831,25 @@ static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx, ubufs[new_uidx].ref_count++; } +// Returns 1 if a resize is pending and 0 otherwise. +static INLINE int av1_resize_pending(const struct AV1_COMP *cpi) { + return cpi->resize_scale_num != cpi->resize_next_scale_num || + cpi->resize_scale_den != cpi->resize_next_scale_den; +} + +// Returns 1 if a frame is unscaled and 0 otherwise. +static INLINE int av1_resize_unscaled(const struct AV1_COMP *cpi) { + return cpi->resize_scale_num == cpi->resize_scale_den; +} + +// Moves resizing to the next state. This is just setting the numerator and +// denominator to the next numerator and denominator, causing +// av1_resize_pending to subsequently return false. +static INLINE void av1_resize_step(struct AV1_COMP *cpi) { + cpi->resize_scale_num = cpi->resize_next_scale_num; + cpi->resize_scale_den = cpi->resize_next_scale_den; +} + #ifdef __cplusplus } // extern "C" #endif diff --git a/third_party/aom/av1/encoder/encodetxb.c b/third_party/aom/av1/encoder/encodetxb.c index 3f71a4472..731642064 100644 --- a/third_party/aom/av1/encoder/encodetxb.c +++ b/third_party/aom/av1/encoder/encodetxb.c @@ -21,6 +21,8 @@ #include "av1/encoder/subexp.h" #include "av1/encoder/tokenize.h" +#define TEST_OPTIMIZE_TXB 0 + void av1_alloc_txb_buf(AV1_COMP *cpi) { #if 0 AV1_COMMON *cm = &cpi->common; @@ -159,7 +161,7 @@ void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd, } // level is above 1. - ctx = get_level_ctx(tcoeff, scan[c], bwl); + ctx = get_br_ctx(tcoeff, scan[c], bwl); for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) { if (level == (idx + 1 + NUM_BASE_LEVELS)) { aom_write(w, 1, cm->fc->coeff_lps[tx_size][plane_type][ctx]); @@ -251,6 +253,32 @@ static INLINE void get_base_ctx_set(const tran_low_t *tcoeffs, return; } +static INLINE int get_br_cost(tran_low_t abs_qc, int ctx, + const aom_prob *coeff_lps) { + const tran_low_t min_level = 1 + NUM_BASE_LEVELS; + const tran_low_t max_level = 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE; + if (abs_qc >= min_level) { + const int cost0 = av1_cost_bit(coeff_lps[ctx], 0); + const int cost1 = av1_cost_bit(coeff_lps[ctx], 1); + if (abs_qc >= max_level) + return COEFF_BASE_RANGE * cost0; + else + return (abs_qc - min_level) * cost0 + cost1; + } else { + return 0; + } +} + +static INLINE int get_base_cost(tran_low_t abs_qc, int ctx, + aom_prob (*coeff_base)[COEFF_BASE_CONTEXTS], + int base_idx) { + const int level = base_idx + 1; + if (abs_qc < level) + return 0; + else + return av1_cost_bit(coeff_base[base_idx][ctx], abs_qc == level); +} + int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, int block, TXB_CTX *txb_ctx) { const AV1_COMMON *const cm = &cpi->common; @@ -331,7 +359,7 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, int idx; int ctx; - ctx = get_level_ctx(qcoeff, scan[c], bwl); + ctx = get_br_ctx(qcoeff, scan[c], bwl); for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) { if (level == (idx + 1 + NUM_BASE_LEVELS)) { @@ -373,12 +401,1085 @@ int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, return cost; } -typedef struct TxbParams { - const AV1_COMP *cpi; - ThreadData *td; - int rate; -} TxbParams; +static INLINE int has_base(tran_low_t qc, int base_idx) { + const int level = base_idx + 1; + return abs(qc) >= level; +} + +static void gen_base_count_mag_arr(int (*base_count_arr)[MAX_TX_SQUARE], + int (*base_mag_arr)[2], + const tran_low_t *qcoeff, int stride, + int eob, const int16_t *scan) { + for (int c = 0; c < eob; ++c) { + const int coeff_idx = scan[c]; // raster order + if (!has_base(qcoeff[coeff_idx], 0)) continue; + const int row = coeff_idx / stride; + const int col = coeff_idx % stride; + int *mag = base_mag_arr[coeff_idx]; + get_mag(mag, qcoeff, stride, row, col, base_ref_offset, + BASE_CONTEXT_POSITION_NUM); + for (int i = 0; i < NUM_BASE_LEVELS; ++i) { + if (!has_base(qcoeff[coeff_idx], i)) continue; + int *count = base_count_arr[i] + coeff_idx; + *count = get_level_count(qcoeff, stride, row, col, i, base_ref_offset, + BASE_CONTEXT_POSITION_NUM); + } + } +} + +static void gen_nz_count_arr(int(*nz_count_arr), const tran_low_t *qcoeff, + int stride, int eob, + const SCAN_ORDER *scan_order) { + const int16_t *scan = scan_order->scan; + const int16_t *iscan = scan_order->iscan; + for (int c = 0; c < eob; ++c) { + const int coeff_idx = scan[c]; // raster order + const int row = coeff_idx / stride; + const int col = coeff_idx % stride; + nz_count_arr[coeff_idx] = get_nz_count(qcoeff, stride, row, col, iscan); + } +} + +static void gen_nz_ctx_arr(int (*nz_ctx_arr)[2], int(*nz_count_arr), + const tran_low_t *qcoeff, int bwl, int eob, + const SCAN_ORDER *scan_order) { + const int16_t *scan = scan_order->scan; + const int16_t *iscan = scan_order->iscan; + for (int c = 0; c < eob; ++c) { + const int coeff_idx = scan[c]; // raster order + const int count = nz_count_arr[coeff_idx]; + nz_ctx_arr[coeff_idx][0] = + get_nz_map_ctx_from_count(count, qcoeff, coeff_idx, bwl, iscan); + } +} + +static void gen_base_ctx_arr(int (*base_ctx_arr)[MAX_TX_SQUARE][2], + int (*base_count_arr)[MAX_TX_SQUARE], + int (*base_mag_arr)[2], const tran_low_t *qcoeff, + int stride, int eob, const int16_t *scan) { + (void)qcoeff; + for (int i = 0; i < NUM_BASE_LEVELS; ++i) { + for (int c = 0; c < eob; ++c) { + const int coeff_idx = scan[c]; // raster order + if (!has_base(qcoeff[coeff_idx], i)) continue; + const int row = coeff_idx / stride; + const int col = coeff_idx % stride; + const int count = base_count_arr[i][coeff_idx]; + const int *mag = base_mag_arr[coeff_idx]; + const int level = i + 1; + base_ctx_arr[i][coeff_idx][0] = + get_base_ctx_from_count_mag(row, col, count, mag[0], level); + } + } +} + +static INLINE int has_br(tran_low_t qc) { + return abs(qc) >= 1 + NUM_BASE_LEVELS; +} + +static void gen_br_count_mag_arr(int *br_count_arr, int (*br_mag_arr)[2], + const tran_low_t *qcoeff, int stride, int eob, + const int16_t *scan) { + for (int c = 0; c < eob; ++c) { + const int coeff_idx = scan[c]; // raster order + if (!has_br(qcoeff[coeff_idx])) continue; + const int row = coeff_idx / stride; + const int col = coeff_idx % stride; + int *count = br_count_arr + coeff_idx; + int *mag = br_mag_arr[coeff_idx]; + *count = get_level_count(qcoeff, stride, row, col, NUM_BASE_LEVELS, + br_ref_offset, BR_CONTEXT_POSITION_NUM); + get_mag(mag, qcoeff, stride, row, col, br_ref_offset, + BR_CONTEXT_POSITION_NUM); + } +} + +static void gen_br_ctx_arr(int (*br_ctx_arr)[2], const int *br_count_arr, + int (*br_mag_arr)[2], const tran_low_t *qcoeff, + int stride, int eob, const int16_t *scan) { + (void)qcoeff; + for (int c = 0; c < eob; ++c) { + const int coeff_idx = scan[c]; // raster order + if (!has_br(qcoeff[coeff_idx])) continue; + const int row = coeff_idx / stride; + const int col = coeff_idx % stride; + const int count = br_count_arr[coeff_idx]; + const int *mag = br_mag_arr[coeff_idx]; + br_ctx_arr[coeff_idx][0] = + get_br_ctx_from_count_mag(row, col, count, mag[0]); + } +} + +static INLINE int get_sign_bit_cost(tran_low_t qc, int coeff_idx, + const aom_prob *dc_sign_prob, + int dc_sign_ctx) { + const int sign = (qc < 0) ? 1 : 0; + // sign bit cost + if (coeff_idx == 0) { + return av1_cost_bit(dc_sign_prob[dc_sign_ctx], sign); + } else { + return av1_cost_bit(128, sign); + } +} +static INLINE int get_golomb_cost(int abs_qc) { + if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { + // residual cost + int r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS; + int ri = r; + int length = 0; + + while (ri) { + ri >>= 1; + ++length; + } + + return av1_cost_literal(2 * length - 1); + } else { + return 0; + } +} + +// TODO(angiebird): add static once this function is called +void gen_txb_cache(TxbCache *txb_cache, TxbInfo *txb_info) { + const int16_t *scan = txb_info->scan_order->scan; + gen_nz_count_arr(txb_cache->nz_count_arr, txb_info->qcoeff, txb_info->stride, + txb_info->eob, txb_info->scan_order); + gen_nz_ctx_arr(txb_cache->nz_ctx_arr, txb_cache->nz_count_arr, + txb_info->qcoeff, txb_info->bwl, txb_info->eob, + txb_info->scan_order); + gen_base_count_mag_arr(txb_cache->base_count_arr, txb_cache->base_mag_arr, + txb_info->qcoeff, txb_info->stride, txb_info->eob, + scan); + gen_base_ctx_arr(txb_cache->base_ctx_arr, txb_cache->base_count_arr, + txb_cache->base_mag_arr, txb_info->qcoeff, txb_info->stride, + txb_info->eob, scan); + gen_br_count_mag_arr(txb_cache->br_count_arr, txb_cache->br_mag_arr, + txb_info->qcoeff, txb_info->stride, txb_info->eob, scan); + gen_br_ctx_arr(txb_cache->br_ctx_arr, txb_cache->br_count_arr, + txb_cache->br_mag_arr, txb_info->qcoeff, txb_info->stride, + txb_info->eob, scan); +} + +static INLINE aom_prob get_level_prob(int level, int coeff_idx, + const TxbCache *txb_cache, + const TxbProbs *txb_probs) { + if (level == 0) { + const int ctx = txb_cache->nz_ctx_arr[coeff_idx][0]; + return txb_probs->nz_map[ctx]; + } else if (level >= 1 && level < 1 + NUM_BASE_LEVELS) { + const int idx = level - 1; + const int ctx = txb_cache->base_ctx_arr[idx][coeff_idx][0]; + return txb_probs->coeff_base[idx][ctx]; + } else if (level >= 1 + NUM_BASE_LEVELS && + level < 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { + const int ctx = txb_cache->br_ctx_arr[coeff_idx][0]; + return txb_probs->coeff_lps[ctx]; + } else if (level >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { + printf("get_level_prob does not support golomb\n"); + assert(0); + return 0; + } else { + assert(0); + return 0; + } +} + +static INLINE tran_low_t get_lower_coeff(tran_low_t qc) { + if (qc == 0) { + return 0; + } + return qc > 0 ? qc - 1 : qc + 1; +} + +static INLINE void update_mag_arr(int *mag_arr, int abs_qc) { + if (mag_arr[0] == abs_qc) { + mag_arr[1] -= 1; + assert(mag_arr[1] >= 0); + } +} + +static INLINE int get_mag_from_mag_arr(const int *mag_arr) { + int mag; + if (mag_arr[1] > 0) { + mag = mag_arr[0]; + } else if (mag_arr[0] > 0) { + mag = mag_arr[0] - 1; + } else { + // no neighbor + assert(mag_arr[0] == 0 && mag_arr[1] == 0); + mag = 0; + } + return mag; +} + +static int neighbor_level_down_update(int *new_count, int *new_mag, int count, + const int *mag, int coeff_idx, + tran_low_t abs_nb_coeff, int nb_coeff_idx, + int level, const TxbInfo *txb_info) { + *new_count = count; + *new_mag = get_mag_from_mag_arr(mag); + + int update = 0; + // check if br_count changes + if (abs_nb_coeff == level) { + update = 1; + *new_count -= 1; + assert(*new_count >= 0); + } + const int row = coeff_idx >> txb_info->bwl; + const int col = coeff_idx - (row << txb_info->bwl); + const int nb_row = nb_coeff_idx >> txb_info->bwl; + const int nb_col = nb_coeff_idx - (nb_row << txb_info->bwl); + + // check if mag changes + if (nb_row >= row && nb_col >= col) { + if (abs_nb_coeff == mag[0]) { + assert(mag[1] > 0); + if (mag[1] == 1) { + // the nb is the only qc with max mag + *new_mag -= 1; + assert(*new_mag >= 0); + update = 1; + } + } + } + return update; +} + +static int try_neighbor_level_down_br(int coeff_idx, int nb_coeff_idx, + const TxbCache *txb_cache, + const TxbProbs *txb_probs, + const TxbInfo *txb_info) { + const tran_low_t qc = txb_info->qcoeff[coeff_idx]; + const tran_low_t abs_qc = abs(qc); + const int level = NUM_BASE_LEVELS + 1; + if (abs_qc < level) return 0; + + const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx]; + const tran_low_t abs_nb_coeff = abs(nb_coeff); + const int count = txb_cache->br_count_arr[coeff_idx]; + const int *mag = txb_cache->br_mag_arr[coeff_idx]; + int new_count; + int new_mag; + const int update = + neighbor_level_down_update(&new_count, &new_mag, count, mag, coeff_idx, + abs_nb_coeff, nb_coeff_idx, level, txb_info); + if (update) { + const int row = coeff_idx >> txb_info->bwl; + const int col = coeff_idx - (row << txb_info->bwl); + const int ctx = txb_cache->br_ctx_arr[coeff_idx][0]; + const int org_cost = get_br_cost(abs_qc, ctx, txb_probs->coeff_lps); + + const int new_ctx = get_br_ctx_from_count_mag(row, col, new_count, new_mag); + const int new_cost = get_br_cost(abs_qc, new_ctx, txb_probs->coeff_lps); + const int cost_diff = -org_cost + new_cost; + return cost_diff; + } else { + return 0; + } +} + +static int try_neighbor_level_down_base(int coeff_idx, int nb_coeff_idx, + const TxbCache *txb_cache, + const TxbProbs *txb_probs, + const TxbInfo *txb_info) { + const tran_low_t qc = txb_info->qcoeff[coeff_idx]; + const tran_low_t abs_qc = abs(qc); + + int cost_diff = 0; + for (int base_idx = 0; base_idx < NUM_BASE_LEVELS; ++base_idx) { + const int level = base_idx + 1; + if (abs_qc < level) continue; + + const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx]; + const tran_low_t abs_nb_coeff = abs(nb_coeff); + + const int count = txb_cache->base_count_arr[base_idx][coeff_idx]; + const int *mag = txb_cache->base_mag_arr[coeff_idx]; + int new_count; + int new_mag; + const int update = + neighbor_level_down_update(&new_count, &new_mag, count, mag, coeff_idx, + abs_nb_coeff, nb_coeff_idx, level, txb_info); + if (update) { + const int row = coeff_idx >> txb_info->bwl; + const int col = coeff_idx - (row << txb_info->bwl); + const int ctx = txb_cache->base_ctx_arr[base_idx][coeff_idx][0]; + const int org_cost = + get_base_cost(abs_qc, ctx, txb_probs->coeff_base, base_idx); + + const int new_ctx = + get_base_ctx_from_count_mag(row, col, new_count, new_mag, level); + const int new_cost = + get_base_cost(abs_qc, new_ctx, txb_probs->coeff_base, base_idx); + cost_diff += -org_cost + new_cost; + } + } + return cost_diff; +} + +static int try_neighbor_level_down_nz(int coeff_idx, int nb_coeff_idx, + const TxbCache *txb_cache, + const TxbProbs *txb_probs, + TxbInfo *txb_info) { + // assume eob doesn't change + const tran_low_t qc = txb_info->qcoeff[coeff_idx]; + const tran_low_t abs_qc = abs(qc); + const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx]; + const tran_low_t abs_nb_coeff = abs(nb_coeff); + if (abs_nb_coeff != 1) return 0; + const int16_t *iscan = txb_info->scan_order->iscan; + const int scan_idx = iscan[coeff_idx]; + if (scan_idx == txb_info->seg_eob) return 0; + const int nb_scan_idx = iscan[nb_coeff_idx]; + if (nb_scan_idx < scan_idx) { + const int count = txb_cache->nz_count_arr[coeff_idx]; + assert(count > 0); + txb_info->qcoeff[nb_coeff_idx] = get_lower_coeff(nb_coeff); + const int new_ctx = get_nz_map_ctx_from_count( + count - 1, txb_info->qcoeff, coeff_idx, txb_info->bwl, iscan); + txb_info->qcoeff[nb_coeff_idx] = nb_coeff; + const int ctx = txb_cache->nz_ctx_arr[coeff_idx][0]; + const int is_nz = abs_qc > 0; + const int org_cost = av1_cost_bit(txb_probs->nz_map[ctx], is_nz); + const int new_cost = av1_cost_bit(txb_probs->nz_map[new_ctx], is_nz); + const int cost_diff = new_cost - org_cost; + return cost_diff; + } else { + return 0; + } +} + +static int try_self_level_down(tran_low_t *low_coeff, int coeff_idx, + const TxbCache *txb_cache, + const TxbProbs *txb_probs, TxbInfo *txb_info) { + const tran_low_t qc = txb_info->qcoeff[coeff_idx]; + if (qc == 0) { + *low_coeff = 0; + return 0; + } + const tran_low_t abs_qc = abs(qc); + *low_coeff = get_lower_coeff(qc); + int cost_diff; + if (*low_coeff == 0) { + const int scan_idx = txb_info->scan_order->iscan[coeff_idx]; + const aom_prob level_prob = + get_level_prob(abs_qc, coeff_idx, txb_cache, txb_probs); + const aom_prob low_level_prob = + get_level_prob(abs(*low_coeff), coeff_idx, txb_cache, txb_probs); + if (scan_idx < txb_info->seg_eob) { + // When level-0, we code the binary of abs_qc > level + // but when level-k k > 0 we code the binary of abs_qc == level + // That's why wee need this special treatment for level-0 map + // TODO(angiebird): make leve-0 consistent to other levels + cost_diff = -av1_cost_bit(level_prob, 1) + + av1_cost_bit(low_level_prob, 0) - + av1_cost_bit(low_level_prob, 1); + } else { + cost_diff = -av1_cost_bit(level_prob, 1); + } + + if (scan_idx < txb_info->seg_eob) { + const int eob_ctx = + get_eob_ctx(txb_info->qcoeff, coeff_idx, txb_info->bwl); + cost_diff -= av1_cost_bit(txb_probs->eob_flag[eob_ctx], + scan_idx == (txb_info->eob - 1)); + } + + const int sign_cost = get_sign_bit_cost( + qc, coeff_idx, txb_probs->dc_sign_prob, txb_info->txb_ctx->dc_sign_ctx); + cost_diff -= sign_cost; + } else if (abs_qc < 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { + const aom_prob level_prob = + get_level_prob(abs_qc, coeff_idx, txb_cache, txb_probs); + const aom_prob low_level_prob = + get_level_prob(abs(*low_coeff), coeff_idx, txb_cache, txb_probs); + cost_diff = -av1_cost_bit(level_prob, 1) + av1_cost_bit(low_level_prob, 1) - + av1_cost_bit(low_level_prob, 0); + } else if (abs_qc == 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { + const aom_prob low_level_prob = + get_level_prob(abs(*low_coeff), coeff_idx, txb_cache, txb_probs); + cost_diff = -get_golomb_cost(abs_qc) + av1_cost_bit(low_level_prob, 1) - + av1_cost_bit(low_level_prob, 0); + } else { + assert(abs_qc > 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE); + const tran_low_t abs_low_coeff = abs(*low_coeff); + cost_diff = -get_golomb_cost(abs_qc) + get_golomb_cost(abs_low_coeff); + } + return cost_diff; +} + +#define COST_MAP_SIZE 5 +#define COST_MAP_OFFSET 2 + +static INLINE int check_nz_neighbor(tran_low_t qc) { return abs(qc) == 1; } + +static INLINE int check_base_neighbor(tran_low_t qc) { + return abs(qc) <= 1 + NUM_BASE_LEVELS; +} + +static INLINE int check_br_neighbor(tran_low_t qc) { + return abs(qc) > BR_MAG_OFFSET; +} + +// TODO(angiebird): add static to this function once it's called +int try_level_down(int coeff_idx, const TxbCache *txb_cache, + const TxbProbs *txb_probs, TxbInfo *txb_info, + int (*cost_map)[COST_MAP_SIZE]) { + if (cost_map) { + for (int i = 0; i < COST_MAP_SIZE; ++i) av1_zero(cost_map[i]); + } + + tran_low_t qc = txb_info->qcoeff[coeff_idx]; + tran_low_t low_coeff; + if (qc == 0) return 0; + int accu_cost_diff = 0; + + const int16_t *iscan = txb_info->scan_order->iscan; + const int eob = txb_info->eob; + const int scan_idx = iscan[coeff_idx]; + if (scan_idx < eob) { + const int cost_diff = try_self_level_down(&low_coeff, coeff_idx, txb_cache, + txb_probs, txb_info); + if (cost_map) + cost_map[0 + COST_MAP_OFFSET][0 + COST_MAP_OFFSET] = cost_diff; + accu_cost_diff += cost_diff; + } + + const int row = coeff_idx >> txb_info->bwl; + const int col = coeff_idx - (row << txb_info->bwl); + if (check_nz_neighbor(qc)) { + for (int i = 0; i < SIG_REF_OFFSET_NUM; ++i) { + const int nb_row = row - sig_ref_offset[i][0]; + const int nb_col = col - sig_ref_offset[i][1]; + const int nb_coeff_idx = nb_row * txb_info->stride + nb_col; + const int nb_scan_idx = iscan[nb_coeff_idx]; + if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 && + nb_row < txb_info->stride && nb_col < txb_info->stride) { + const int cost_diff = try_neighbor_level_down_nz( + nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info); + if (cost_map) + cost_map[nb_row - row + COST_MAP_OFFSET] + [nb_col - col + COST_MAP_OFFSET] += cost_diff; + accu_cost_diff += cost_diff; + } + } + } + + if (check_base_neighbor(qc)) { + for (int i = 0; i < BASE_CONTEXT_POSITION_NUM; ++i) { + const int nb_row = row - base_ref_offset[i][0]; + const int nb_col = col - base_ref_offset[i][1]; + const int nb_coeff_idx = nb_row * txb_info->stride + nb_col; + const int nb_scan_idx = iscan[nb_coeff_idx]; + if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 && + nb_row < txb_info->stride && nb_col < txb_info->stride) { + const int cost_diff = try_neighbor_level_down_base( + nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info); + if (cost_map) + cost_map[nb_row - row + COST_MAP_OFFSET] + [nb_col - col + COST_MAP_OFFSET] += cost_diff; + accu_cost_diff += cost_diff; + } + } + } + + if (check_br_neighbor(qc)) { + for (int i = 0; i < BR_CONTEXT_POSITION_NUM; ++i) { + const int nb_row = row - br_ref_offset[i][0]; + const int nb_col = col - br_ref_offset[i][1]; + const int nb_coeff_idx = nb_row * txb_info->stride + nb_col; + const int nb_scan_idx = iscan[nb_coeff_idx]; + if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 && + nb_row < txb_info->stride && nb_col < txb_info->stride) { + const int cost_diff = try_neighbor_level_down_br( + nb_coeff_idx, coeff_idx, txb_cache, txb_probs, txb_info); + if (cost_map) + cost_map[nb_row - row + COST_MAP_OFFSET] + [nb_col - col + COST_MAP_OFFSET] += cost_diff; + accu_cost_diff += cost_diff; + } + } + } + + return accu_cost_diff; +} + +static int get_low_coeff_cost(int coeff_idx, const TxbCache *txb_cache, + const TxbProbs *txb_probs, + const TxbInfo *txb_info) { + const tran_low_t qc = txb_info->qcoeff[coeff_idx]; + const int abs_qc = abs(qc); + assert(abs_qc <= 1); + int cost = 0; + const int scan_idx = txb_info->scan_order->iscan[coeff_idx]; + if (scan_idx < txb_info->seg_eob) { + const aom_prob level_prob = + get_level_prob(0, coeff_idx, txb_cache, txb_probs); + cost += av1_cost_bit(level_prob, qc != 0); + } + + if (qc != 0) { + const int base_idx = 0; + const int ctx = txb_cache->base_ctx_arr[base_idx][coeff_idx][0]; + cost += get_base_cost(abs_qc, ctx, txb_probs->coeff_base, base_idx); + if (scan_idx < txb_info->seg_eob) { + const int eob_ctx = + get_eob_ctx(txb_info->qcoeff, coeff_idx, txb_info->bwl); + cost += av1_cost_bit(txb_probs->eob_flag[eob_ctx], + scan_idx == (txb_info->eob - 1)); + } + cost += get_sign_bit_cost(qc, coeff_idx, txb_probs->dc_sign_prob, + txb_info->txb_ctx->dc_sign_ctx); + } + return cost; +} + +static INLINE void set_eob(TxbInfo *txb_info, int eob) { + txb_info->eob = eob; + txb_info->seg_eob = AOMMIN(eob, tx_size_2d[txb_info->tx_size] - 1); +} + +// TODO(angiebird): add static to this function once it's called +int try_change_eob(int *new_eob, int coeff_idx, const TxbCache *txb_cache, + const TxbProbs *txb_probs, TxbInfo *txb_info) { + assert(txb_info->eob > 0); + const tran_low_t qc = txb_info->qcoeff[coeff_idx]; + const int abs_qc = abs(qc); + if (abs_qc != 1) { + *new_eob = -1; + return 0; + } + const int16_t *iscan = txb_info->scan_order->iscan; + const int16_t *scan = txb_info->scan_order->scan; + const int scan_idx = iscan[coeff_idx]; + *new_eob = 0; + int cost_diff = 0; + cost_diff -= get_low_coeff_cost(coeff_idx, txb_cache, txb_probs, txb_info); + // int coeff_cost = + // get_coeff_cost(qc, scan_idx, txb_info, txb_probs); + // if (-cost_diff != coeff_cost) { + // printf("-cost_diff %d coeff_cost %d\n", -cost_diff, coeff_cost); + // get_low_coeff_cost(coeff_idx, txb_cache, txb_probs, txb_info); + // get_coeff_cost(qc, scan_idx, txb_info, txb_probs); + // } + for (int si = scan_idx - 1; si >= 0; --si) { + const int ci = scan[si]; + if (txb_info->qcoeff[ci] != 0) { + *new_eob = si + 1; + break; + } else { + cost_diff -= get_low_coeff_cost(ci, txb_cache, txb_probs, txb_info); + } + } + + const int org_eob = txb_info->eob; + set_eob(txb_info, *new_eob); + cost_diff += try_level_down(coeff_idx, txb_cache, txb_probs, txb_info, NULL); + set_eob(txb_info, org_eob); + + if (*new_eob > 0) { + // Note that get_eob_ctx does NOT actually account for qcoeff, so we don't + // need to lower down the qcoeff here + const int eob_ctx = + get_eob_ctx(txb_info->qcoeff, scan[*new_eob - 1], txb_info->bwl); + cost_diff -= av1_cost_bit(txb_probs->eob_flag[eob_ctx], 0); + cost_diff += av1_cost_bit(txb_probs->eob_flag[eob_ctx], 1); + } else { + const int txb_skip_ctx = txb_info->txb_ctx->txb_skip_ctx; + cost_diff -= av1_cost_bit(txb_probs->txb_skip[txb_skip_ctx], 0); + cost_diff += av1_cost_bit(txb_probs->txb_skip[txb_skip_ctx], 1); + } + return cost_diff; +} + +static INLINE tran_low_t qcoeff_to_dqcoeff(tran_low_t qc, int dqv, int shift) { + int sgn = qc < 0 ? -1 : 1; + return sgn * ((abs(qc) * dqv) >> shift); +} + +// TODO(angiebird): add static to this function it's called +void update_level_down(int coeff_idx, TxbCache *txb_cache, TxbInfo *txb_info) { + const tran_low_t qc = txb_info->qcoeff[coeff_idx]; + const int abs_qc = abs(qc); + if (qc == 0) return; + const tran_low_t low_coeff = get_lower_coeff(qc); + txb_info->qcoeff[coeff_idx] = low_coeff; + const int dqv = txb_info->dequant[coeff_idx != 0]; + txb_info->dqcoeff[coeff_idx] = + qcoeff_to_dqcoeff(low_coeff, dqv, txb_info->shift); + + const int row = coeff_idx >> txb_info->bwl; + const int col = coeff_idx - (row << txb_info->bwl); + const int eob = txb_info->eob; + const int16_t *iscan = txb_info->scan_order->iscan; + for (int i = 0; i < SIG_REF_OFFSET_NUM; ++i) { + const int nb_row = row - sig_ref_offset[i][0]; + const int nb_col = col - sig_ref_offset[i][1]; + const int nb_coeff_idx = nb_row * txb_info->stride + nb_col; + const int nb_scan_idx = iscan[nb_coeff_idx]; + if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 && + nb_row < txb_info->stride && nb_col < txb_info->stride) { + const int scan_idx = iscan[coeff_idx]; + if (scan_idx < nb_scan_idx) { + const int level = 1; + if (abs_qc == level) { + txb_cache->nz_count_arr[nb_coeff_idx] -= 1; + assert(txb_cache->nz_count_arr[nb_coeff_idx] >= 0); + } + const int count = txb_cache->nz_count_arr[nb_coeff_idx]; + txb_cache->nz_ctx_arr[nb_coeff_idx][0] = get_nz_map_ctx_from_count( + count, txb_info->qcoeff, nb_coeff_idx, txb_info->bwl, iscan); + // int ref_ctx = get_nz_map_ctx2(txb_info->qcoeff, nb_coeff_idx, + // txb_info->bwl, iscan); + // if (ref_ctx != txb_cache->nz_ctx_arr[nb_coeff_idx][0]) + // printf("nz ctx %d ref_ctx %d\n", + // txb_cache->nz_ctx_arr[nb_coeff_idx][0], ref_ctx); + } + } + } + + for (int i = 0; i < BASE_CONTEXT_POSITION_NUM; ++i) { + const int nb_row = row - base_ref_offset[i][0]; + const int nb_col = col - base_ref_offset[i][1]; + const int nb_coeff_idx = nb_row * txb_info->stride + nb_col; + const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx]; + if (!has_base(nb_coeff, 0)) continue; + const int nb_scan_idx = iscan[nb_coeff_idx]; + if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 && + nb_row < txb_info->stride && nb_col < txb_info->stride) { + if (row >= nb_row && col >= nb_col) + update_mag_arr(txb_cache->base_mag_arr[nb_coeff_idx], abs_qc); + const int mag = + get_mag_from_mag_arr(txb_cache->base_mag_arr[nb_coeff_idx]); + for (int base_idx = 0; base_idx < NUM_BASE_LEVELS; ++base_idx) { + if (!has_base(nb_coeff, base_idx)) continue; + const int level = base_idx + 1; + if (abs_qc == level) { + txb_cache->base_count_arr[base_idx][nb_coeff_idx] -= 1; + assert(txb_cache->base_count_arr[base_idx][nb_coeff_idx] >= 0); + } + const int count = txb_cache->base_count_arr[base_idx][nb_coeff_idx]; + txb_cache->base_ctx_arr[base_idx][nb_coeff_idx][0] = + get_base_ctx_from_count_mag(nb_row, nb_col, count, mag, level); + // int ref_ctx = get_base_ctx(txb_info->qcoeff, nb_coeff_idx, + // txb_info->bwl, level); + // if (ref_ctx != txb_cache->base_ctx_arr[base_idx][nb_coeff_idx][0]) { + // printf("base ctx %d ref_ctx %d\n", + // txb_cache->base_ctx_arr[base_idx][nb_coeff_idx][0], ref_ctx); + // } + } + } + } + + for (int i = 0; i < BR_CONTEXT_POSITION_NUM; ++i) { + const int nb_row = row - br_ref_offset[i][0]; + const int nb_col = col - br_ref_offset[i][1]; + const int nb_coeff_idx = nb_row * txb_info->stride + nb_col; + const int nb_scan_idx = iscan[nb_coeff_idx]; + const tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx]; + if (!has_br(nb_coeff)) continue; + if (nb_scan_idx < eob && nb_row >= 0 && nb_col >= 0 && + nb_row < txb_info->stride && nb_col < txb_info->stride) { + const int level = 1 + NUM_BASE_LEVELS; + if (abs_qc == level) { + txb_cache->br_count_arr[nb_coeff_idx] -= 1; + assert(txb_cache->br_count_arr[nb_coeff_idx] >= 0); + } + if (row >= nb_row && col >= nb_col) + update_mag_arr(txb_cache->br_mag_arr[nb_coeff_idx], abs_qc); + const int count = txb_cache->br_count_arr[nb_coeff_idx]; + const int mag = get_mag_from_mag_arr(txb_cache->br_mag_arr[nb_coeff_idx]); + txb_cache->br_ctx_arr[nb_coeff_idx][0] = + get_br_ctx_from_count_mag(nb_row, nb_col, count, mag); + // int ref_ctx = get_level_ctx(txb_info->qcoeff, nb_coeff_idx, + // txb_info->bwl); + // if (ref_ctx != txb_cache->br_ctx_arr[nb_coeff_idx][0]) { + // printf("base ctx %d ref_ctx %d\n", + // txb_cache->br_ctx_arr[nb_coeff_idx][0], ref_ctx); + // } + } + } +} + +static int get_coeff_cost(tran_low_t qc, int scan_idx, TxbInfo *txb_info, + const TxbProbs *txb_probs) { + const TXB_CTX *txb_ctx = txb_info->txb_ctx; + const int is_nz = (qc != 0); + const tran_low_t abs_qc = abs(qc); + int cost = 0; + const int16_t *scan = txb_info->scan_order->scan; + const int16_t *iscan = txb_info->scan_order->iscan; + + if (scan_idx < txb_info->seg_eob) { + int coeff_ctx = + get_nz_map_ctx2(txb_info->qcoeff, scan[scan_idx], txb_info->bwl, iscan); + cost += av1_cost_bit(txb_probs->nz_map[coeff_ctx], is_nz); + } + + if (is_nz) { + cost += get_sign_bit_cost(qc, scan_idx, txb_probs->dc_sign_prob, + txb_ctx->dc_sign_ctx); + + int ctx_ls[NUM_BASE_LEVELS] = { 0 }; + get_base_ctx_set(txb_info->qcoeff, scan[scan_idx], txb_info->bwl, ctx_ls); + + int i; + for (i = 0; i < NUM_BASE_LEVELS; ++i) { + cost += get_base_cost(abs_qc, ctx_ls[i], txb_probs->coeff_base, i); + } + + if (abs_qc > NUM_BASE_LEVELS) { + int ctx = get_br_ctx(txb_info->qcoeff, scan[scan_idx], txb_info->bwl); + cost += get_br_cost(abs_qc, ctx, txb_probs->coeff_lps); + cost += get_golomb_cost(abs_qc); + } + + if (scan_idx < txb_info->seg_eob) { + int eob_ctx = + get_eob_ctx(txb_info->qcoeff, scan[scan_idx], txb_info->bwl); + cost += av1_cost_bit(txb_probs->eob_flag[eob_ctx], + scan_idx == (txb_info->eob - 1)); + } + } + return cost; +} + +#if TEST_OPTIMIZE_TXB +#define ALL_REF_OFFSET_NUM 17 +static int all_ref_offset[ALL_REF_OFFSET_NUM][2] = { + { 0, 0 }, { -2, -1 }, { -2, 0 }, { -2, 1 }, { -1, -2 }, { -1, -1 }, + { -1, 0 }, { -1, 1 }, { 0, -2 }, { 0, -1 }, { 1, -2 }, { 1, -1 }, + { 1, 0 }, { 2, 0 }, { 0, 1 }, { 0, 2 }, { 1, 1 }, +}; + +static int try_level_down_ref(int coeff_idx, const TxbProbs *txb_probs, + TxbInfo *txb_info, + int (*cost_map)[COST_MAP_SIZE]) { + if (cost_map) { + for (int i = 0; i < COST_MAP_SIZE; ++i) av1_zero(cost_map[i]); + } + tran_low_t qc = txb_info->qcoeff[coeff_idx]; + if (qc == 0) return 0; + int row = coeff_idx >> txb_info->bwl; + int col = coeff_idx - (row << txb_info->bwl); + int org_cost = 0; + for (int i = 0; i < ALL_REF_OFFSET_NUM; ++i) { + int nb_row = row - all_ref_offset[i][0]; + int nb_col = col - all_ref_offset[i][1]; + int nb_coeff_idx = nb_row * txb_info->stride + nb_col; + int nb_scan_idx = txb_info->scan_order->iscan[nb_coeff_idx]; + if (nb_scan_idx < txb_info->eob && nb_row >= 0 && nb_col >= 0 && + nb_row < txb_info->stride && nb_col < txb_info->stride) { + tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx]; + int cost = get_coeff_cost(nb_coeff, nb_scan_idx, txb_info, txb_probs); + if (cost_map) + cost_map[nb_row - row + COST_MAP_OFFSET] + [nb_col - col + COST_MAP_OFFSET] -= cost; + org_cost += cost; + } + } + txb_info->qcoeff[coeff_idx] = get_lower_coeff(qc); + int new_cost = 0; + for (int i = 0; i < ALL_REF_OFFSET_NUM; ++i) { + int nb_row = row - all_ref_offset[i][0]; + int nb_col = col - all_ref_offset[i][1]; + int nb_coeff_idx = nb_row * txb_info->stride + nb_col; + int nb_scan_idx = txb_info->scan_order->iscan[nb_coeff_idx]; + if (nb_scan_idx < txb_info->eob && nb_row >= 0 && nb_col >= 0 && + nb_row < txb_info->stride && nb_col < txb_info->stride) { + tran_low_t nb_coeff = txb_info->qcoeff[nb_coeff_idx]; + int cost = get_coeff_cost(nb_coeff, nb_scan_idx, txb_info, txb_probs); + if (cost_map) + cost_map[nb_row - row + COST_MAP_OFFSET] + [nb_col - col + COST_MAP_OFFSET] += cost; + new_cost += cost; + } + } + txb_info->qcoeff[coeff_idx] = qc; + return new_cost - org_cost; +} +static void test_level_down(int coeff_idx, const TxbCache *txb_cache, + const TxbProbs *txb_probs, TxbInfo *txb_info) { + int cost_map[COST_MAP_SIZE][COST_MAP_SIZE]; + int ref_cost_map[COST_MAP_SIZE][COST_MAP_SIZE]; + const int cost_diff = + try_level_down(coeff_idx, txb_cache, txb_probs, txb_info, cost_map); + const int cost_diff_ref = + try_level_down_ref(coeff_idx, txb_probs, txb_info, ref_cost_map); + if (cost_diff != cost_diff_ref) { + printf("qc %d cost_diff %d cost_diff_ref %d\n", txb_info->qcoeff[coeff_idx], + cost_diff, cost_diff_ref); + for (int r = 0; r < COST_MAP_SIZE; ++r) { + for (int c = 0; c < COST_MAP_SIZE; ++c) { + printf("%d:%d ", cost_map[r][c], ref_cost_map[r][c]); + } + printf("\n"); + } + } +} +#endif + +// TODO(angiebird): make this static once it's called +int get_txb_cost(TxbInfo *txb_info, const TxbProbs *txb_probs) { + int cost = 0; + int txb_skip_ctx = txb_info->txb_ctx->txb_skip_ctx; + const int16_t *scan = txb_info->scan_order->scan; + if (txb_info->eob == 0) { + cost = av1_cost_bit(txb_probs->txb_skip[txb_skip_ctx], 1); + return cost; + } + cost = av1_cost_bit(txb_probs->txb_skip[txb_skip_ctx], 0); + for (int c = 0; c < txb_info->eob; ++c) { + tran_low_t qc = txb_info->qcoeff[scan[c]]; + int coeff_cost = get_coeff_cost(qc, c, txb_info, txb_probs); + cost += coeff_cost; + } + return cost; +} + +#if TEST_OPTIMIZE_TXB +void test_try_change_eob(TxbInfo *txb_info, TxbProbs *txb_probs, + TxbCache *txb_cache) { + int eob = txb_info->eob; + const int16_t *scan = txb_info->scan_order->scan; + if (eob > 0) { + int last_si = eob - 1; + int last_ci = scan[last_si]; + int last_coeff = txb_info->qcoeff[last_ci]; + if (abs(last_coeff) == 1) { + int new_eob; + int cost_diff = + try_change_eob(&new_eob, last_ci, txb_cache, txb_probs, txb_info); + int org_eob = txb_info->eob; + int cost = get_txb_cost(txb_info, txb_probs); + + txb_info->qcoeff[last_ci] = get_lower_coeff(last_coeff); + set_eob(txb_info, new_eob); + int new_cost = get_txb_cost(txb_info, txb_probs); + set_eob(txb_info, org_eob); + txb_info->qcoeff[last_ci] = last_coeff; + + int ref_cost_diff = -cost + new_cost; + if (cost_diff != ref_cost_diff) + printf("org_eob %d new_eob %d cost_diff %d ref_cost_diff %d\n", org_eob, + new_eob, cost_diff, ref_cost_diff); + } + } +} +#endif + +static INLINE int64_t get_coeff_dist(tran_low_t tcoeff, tran_low_t dqcoeff, + int shift) { + const int64_t diff = (tcoeff - dqcoeff) * (1 << shift); + const int64_t error = diff * diff; + return error; +} + +typedef struct LevelDownStats { + int update; + tran_low_t low_qc; + tran_low_t low_dqc; + int64_t rd_diff; + int cost_diff; + int64_t dist_diff; + int new_eob; +} LevelDownStats; + +void try_level_down_facade(LevelDownStats *stats, int scan_idx, + const TxbCache *txb_cache, const TxbProbs *txb_probs, + TxbInfo *txb_info) { + const int16_t *scan = txb_info->scan_order->scan; + const int coeff_idx = scan[scan_idx]; + const tran_low_t qc = txb_info->qcoeff[coeff_idx]; + stats->new_eob = -1; + stats->update = 0; + if (qc == 0) { + return; + } + + const tran_low_t tqc = txb_info->tcoeff[coeff_idx]; + const int dqv = txb_info->dequant[coeff_idx != 0]; + + const tran_low_t dqc = qcoeff_to_dqcoeff(qc, dqv, txb_info->shift); + const int64_t dqc_dist = get_coeff_dist(tqc, dqc, txb_info->shift); + + stats->low_qc = get_lower_coeff(qc); + stats->low_dqc = qcoeff_to_dqcoeff(stats->low_qc, dqv, txb_info->shift); + const int64_t low_dqc_dist = + get_coeff_dist(tqc, stats->low_dqc, txb_info->shift); + + stats->dist_diff = -dqc_dist + low_dqc_dist; + stats->cost_diff = 0; + stats->new_eob = txb_info->eob; + if (scan_idx == txb_info->eob - 1 && abs(qc) == 1) { + stats->cost_diff = try_change_eob(&stats->new_eob, coeff_idx, txb_cache, + txb_probs, txb_info); + } else { + stats->cost_diff = + try_level_down(coeff_idx, txb_cache, txb_probs, txb_info, NULL); +#if TEST_OPTIMIZE_TXB + test_level_down(coeff_idx, txb_cache, txb_probs, txb_info); +#endif + } + stats->rd_diff = RDCOST(txb_info->rdmult, txb_info->rddiv, stats->cost_diff, + stats->dist_diff); + if (stats->rd_diff < 0) stats->update = 1; + return; +} + +static int optimize_txb(TxbInfo *txb_info, const TxbProbs *txb_probs, + TxbCache *txb_cache, int dry_run) { + int update = 0; + if (txb_info->eob == 0) return update; + int cost_diff = 0; + int64_t dist_diff = 0; + int64_t rd_diff = 0; + const int max_eob = tx_size_2d[txb_info->tx_size]; + +#if TEST_OPTIMIZE_TXB + int64_t sse; + int64_t org_dist = + av1_block_error_c(txb_info->tcoeff, txb_info->dqcoeff, max_eob, &sse) * + (1 << (2 * txb_info->shift)); + int org_cost = get_txb_cost(txb_info, txb_probs); +#endif + + tran_low_t *org_qcoeff = txb_info->qcoeff; + tran_low_t *org_dqcoeff = txb_info->dqcoeff; + + tran_low_t tmp_qcoeff[MAX_TX_SQUARE]; + tran_low_t tmp_dqcoeff[MAX_TX_SQUARE]; + const int org_eob = txb_info->eob; + if (dry_run) { + memcpy(tmp_qcoeff, org_qcoeff, sizeof(org_qcoeff[0]) * max_eob); + memcpy(tmp_dqcoeff, org_dqcoeff, sizeof(org_dqcoeff[0]) * max_eob); + txb_info->qcoeff = tmp_qcoeff; + txb_info->dqcoeff = tmp_dqcoeff; + } + + const int16_t *scan = txb_info->scan_order->scan; + + // forward optimize the nz_map + const int cur_eob = txb_info->eob; + for (int si = 0; si < cur_eob; ++si) { + const int coeff_idx = scan[si]; + tran_low_t qc = txb_info->qcoeff[coeff_idx]; + if (abs(qc) == 1) { + LevelDownStats stats; + try_level_down_facade(&stats, si, txb_cache, txb_probs, txb_info); + if (stats.update) { + update = 1; + cost_diff += stats.cost_diff; + dist_diff += stats.dist_diff; + rd_diff += stats.rd_diff; + update_level_down(coeff_idx, txb_cache, txb_info); + set_eob(txb_info, stats.new_eob); + } + } + } + + // backward optimize the level-k map + for (int si = txb_info->eob - 1; si >= 0; --si) { + LevelDownStats stats; + try_level_down_facade(&stats, si, txb_cache, txb_probs, txb_info); + const int coeff_idx = scan[si]; + if (stats.update) { +#if TEST_OPTIMIZE_TXB +// printf("si %d low_qc %d cost_diff %d dist_diff %ld rd_diff %ld eob %d new_eob +// %d\n", si, stats.low_qc, stats.cost_diff, stats.dist_diff, stats.rd_diff, +// txb_info->eob, stats.new_eob); +#endif + update = 1; + cost_diff += stats.cost_diff; + dist_diff += stats.dist_diff; + rd_diff += stats.rd_diff; + update_level_down(coeff_idx, txb_cache, txb_info); + set_eob(txb_info, stats.new_eob); + } + if (si > txb_info->eob) si = txb_info->eob; + } +#if TEST_OPTIMIZE_TXB + int64_t new_dist = + av1_block_error_c(txb_info->tcoeff, txb_info->dqcoeff, max_eob, &sse) * + (1 << (2 * txb_info->shift)); + int new_cost = get_txb_cost(txb_info, txb_probs); + int64_t ref_dist_diff = new_dist - org_dist; + int ref_cost_diff = new_cost - org_cost; + if (cost_diff != ref_cost_diff || dist_diff != ref_dist_diff) + printf( + "overall rd_diff %ld\ncost_diff %d ref_cost_diff%d\ndist_diff %ld " + "ref_dist_diff %ld\neob %d new_eob %d\n\n", + rd_diff, cost_diff, ref_cost_diff, dist_diff, ref_dist_diff, org_eob, + txb_info->eob); +#endif + if (dry_run) { + txb_info->qcoeff = org_qcoeff; + txb_info->dqcoeff = org_dqcoeff; + set_eob(txb_info, org_eob); + } + return update; +} + +// These numbers are empirically obtained. +static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = { +#if CONFIG_EC_ADAPT + { 17, 13 }, { 16, 10 }, +#else + { 20, 12 }, { 16, 12 }, +#endif +}; + +int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, + TX_SIZE tx_size, TXB_CTX *txb_ctx) { + MACROBLOCKD *const xd = &x->e_mbd; + const PLANE_TYPE plane_type = get_plane_type(plane); + const TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); + const MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + const struct macroblock_plane *p = &x->plane[plane]; + struct macroblockd_plane *pd = &xd->plane[plane]; + const int eob = p->eobs[block]; + tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); + tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); + const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block); + const int16_t *dequant = pd->dequant; + const int seg_eob = AOMMIN(eob, tx_size_2d[tx_size] - 1); + const aom_prob *nz_map = xd->fc->nz_map[tx_size][plane_type]; + + const int bwl = b_width_log2_lookup[txsize_to_bsize[tx_size]] + 2; + const int stride = 1 << bwl; + aom_prob(*coeff_base)[COEFF_BASE_CONTEXTS] = + xd->fc->coeff_base[tx_size][plane_type]; + + const aom_prob *coeff_lps = xd->fc->coeff_lps[tx_size][plane_type]; + + const int is_inter = is_inter_block(mbmi); + const SCAN_ORDER *const scan_order = + get_scan(cm, tx_size, tx_type, is_inter_block(mbmi)); + + const TxbProbs txb_probs = { xd->fc->dc_sign[plane_type], + nz_map, + coeff_base, + coeff_lps, + xd->fc->eob_flag[tx_size][plane_type], + xd->fc->txb_skip[tx_size] }; + + const int shift = av1_get_tx_scale(tx_size); + const int64_t rdmult = + (x->rdmult * plane_rd_mult[is_inter][plane_type] + 2) >> 2; + const int64_t rddiv = x->rddiv; + + TxbInfo txb_info = { qcoeff, dqcoeff, tcoeff, dequant, shift, + tx_size, bwl, stride, eob, seg_eob, + scan_order, txb_ctx, rdmult, rddiv }; + TxbCache txb_cache; + gen_txb_cache(&txb_cache, &txb_info); + + const int update = optimize_txb(&txb_info, &txb_probs, &txb_cache, 0); + if (update) p->eobs[block] = txb_info.eob; + return txb_info.eob; +} int av1_get_txb_entropy_context(const tran_low_t *qcoeff, const SCAN_ORDER *scan_order, int eob) { const int16_t *scan = scan_order->scan; @@ -394,10 +1495,10 @@ int av1_get_txb_entropy_context(const tran_low_t *qcoeff, return cul_level; } -static void update_txb_context(int plane, int block, int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, - void *arg) { - TxbParams *const args = arg; +void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, + void *arg) { + struct tokenize_b_args *const args = arg; const AV1_COMP *cpi = args->cpi; const AV1_COMMON *cm = &cpi->common; ThreadData *const td = args->td; @@ -418,10 +1519,10 @@ static void update_txb_context(int plane, int block, int blk_row, int blk_col, av1_set_contexts(xd, pd, plane, tx_size, cul_level, blk_col, blk_row); } -static void update_and_record_txb_context(int plane, int block, int blk_row, - int blk_col, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - TxbParams *const args = arg; +void av1_update_and_record_txb_context(int plane, int block, int blk_row, + int blk_col, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, void *arg) { + struct tokenize_b_args *const args = arg; const AV1_COMP *cpi = args->cpi; const AV1_COMMON *cm = &cpi->common; ThreadData *const td = args->td; @@ -529,7 +1630,7 @@ static void update_and_record_txb_context(int plane, int block, int blk_row, } // level is above 1. - ctx = get_level_ctx(tcoeff, scan[c], bwl); + ctx = get_br_ctx(tcoeff, scan[c], bwl); for (idx = 0; idx < COEFF_BASE_RANGE; ++idx) { if (level == (idx + 1 + NUM_BASE_LEVELS)) { ++td->counts->coeff_lps[tx_size][plane_type][ctx][1]; @@ -568,23 +1669,23 @@ void av1_update_txb_context(const AV1_COMP *cpi, ThreadData *td, const int ctx = av1_get_skip_context(xd); const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP); - struct TxbParams arg = { cpi, td, 0 }; + struct tokenize_b_args arg = { cpi, td, NULL, 0 }; (void)rate; (void)mi_row; (void)mi_col; if (mbmi->skip) { if (!dry_run) td->counts->skip[ctx][1] += skip_inc; - reset_skip_context(xd, bsize); + av1_reset_skip_context(xd, mi_row, mi_col, bsize); return; } if (!dry_run) { td->counts->skip[ctx][0] += skip_inc; av1_foreach_transformed_block(xd, bsize, mi_row, mi_col, - update_and_record_txb_context, &arg); + av1_update_and_record_txb_context, &arg); } else if (dry_run == DRY_RUN_NORMAL) { - av1_foreach_transformed_block(xd, bsize, mi_row, mi_col, update_txb_context, - &arg); + av1_foreach_transformed_block(xd, bsize, mi_row, mi_col, + av1_update_txb_context_b, &arg); } else { printf("DRY_RUN_COSTCOEFFS is not supported yet\n"); assert(0); @@ -749,8 +1850,7 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane, av1_invalid_rd_stats(&this_rd_stats); av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP); - if (x->plane[plane].eobs[block] && !xd->lossless[mbmi->segment_id]) - av1_optimize_b(cm, x, plane, block, tx_size, coeff_ctx); + av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l); av1_dist_block(cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size, &this_rd_stats.dist, &this_rd_stats.sse, OUTPUT_HAS_PREDICTED_PIXELS); @@ -771,8 +1871,7 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane, // copy the best result in the above tx_type search for loop av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP); - if (x->plane[plane].eobs[block] && !xd->lossless[mbmi->segment_id]) - av1_optimize_b(cm, x, plane, block, tx_size, coeff_ctx); + av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l); if (!is_inter_block(mbmi)) { // intra mode needs decoded result such that the next transform block // can use it for prediction. diff --git a/third_party/aom/av1/encoder/encodetxb.h b/third_party/aom/av1/encoder/encodetxb.h index 552d47b54..836033a54 100644 --- a/third_party/aom/av1/encoder/encodetxb.h +++ b/third_party/aom/av1/encoder/encodetxb.h @@ -22,6 +22,47 @@ #ifdef __cplusplus extern "C" { #endif + +typedef struct TxbInfo { + tran_low_t *qcoeff; + tran_low_t *dqcoeff; + const tran_low_t *tcoeff; + const int16_t *dequant; + int shift; + TX_SIZE tx_size; + int bwl; + int stride; + int eob; + int seg_eob; + const SCAN_ORDER *scan_order; + TXB_CTX *txb_ctx; + int64_t rdmult; + int64_t rddiv; +} TxbInfo; + +typedef struct TxbCache { + int nz_count_arr[MAX_TX_SQUARE]; + int nz_ctx_arr[MAX_TX_SQUARE][2]; + int base_count_arr[NUM_BASE_LEVELS][MAX_TX_SQUARE]; + int base_mag_arr[MAX_TX_SQUARE] + [2]; // [0]: max magnitude [1]: num of max magnitude + int base_ctx_arr[NUM_BASE_LEVELS][MAX_TX_SQUARE][2]; // [1]: not used + + int br_count_arr[MAX_TX_SQUARE]; + int br_mag_arr[MAX_TX_SQUARE] + [2]; // [0]: max magnitude [1]: num of max magnitude + int br_ctx_arr[MAX_TX_SQUARE][2]; // [1]: not used +} TxbCache; + +typedef struct TxbProbs { + const aom_prob *dc_sign_prob; + const aom_prob *nz_map; + aom_prob (*coeff_base)[COEFF_BASE_CONTEXTS]; + const aom_prob *coeff_lps; + const aom_prob *eob_flag; + const aom_prob *txb_skip; +} TxbProbs; + void av1_alloc_txb_buf(AV1_COMP *cpi); void av1_free_txb_buf(AV1_COMP *cpi); int av1_cost_coeffs_txb(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, @@ -39,6 +80,14 @@ void av1_update_txb_context(const AV1_COMP *cpi, ThreadData *td, const int mi_row, const int mi_col); void av1_write_txb_probs(AV1_COMP *cpi, aom_writer *w); +void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, + void *arg); + +void av1_update_and_record_txb_context(int plane, int block, int blk_row, + int blk_col, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, void *arg); + #if CONFIG_TXK_SEL int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int block, int blk_row, int blk_col, @@ -46,6 +95,8 @@ int64_t av1_search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane, const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l, int use_fast_coef_costing, RD_STATS *rd_stats); #endif +int av1_optimize_txb(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, + TX_SIZE tx_size, TXB_CTX *txb_ctx); #ifdef __cplusplus } #endif diff --git a/third_party/aom/av1/encoder/ethread.c b/third_party/aom/av1/encoder/ethread.c index 34f0b9566..7af5f78b6 100644 --- a/third_party/aom/av1/encoder/ethread.c +++ b/third_party/aom/av1/encoder/ethread.c @@ -93,14 +93,42 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) { thread_data->td->pc_tree = NULL; av1_setup_pc_tree(cm, thread_data->td); - // Set up variance tree if needed. - if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) - av1_setup_var_tree(cm, thread_data->td); - +#if CONFIG_MOTION_VAR +#if CONFIG_HIGHBITDEPTH + int buf_scaler = 2; +#else + int buf_scaler = 1; +#endif + CHECK_MEM_ERROR(cm, thread_data->td->above_pred_buf, + (uint8_t *)aom_memalign( + 16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE * + sizeof(*thread_data->td->above_pred_buf))); + CHECK_MEM_ERROR(cm, thread_data->td->left_pred_buf, + (uint8_t *)aom_memalign( + 16, buf_scaler * MAX_MB_PLANE * MAX_SB_SQUARE * + sizeof(*thread_data->td->left_pred_buf))); + CHECK_MEM_ERROR( + cm, thread_data->td->wsrc_buf, + (int32_t *)aom_memalign( + 16, MAX_SB_SQUARE * sizeof(*thread_data->td->wsrc_buf))); + CHECK_MEM_ERROR( + cm, thread_data->td->mask_buf, + (int32_t *)aom_memalign( + 16, MAX_SB_SQUARE * sizeof(*thread_data->td->mask_buf))); +#endif // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, aom_calloc(1, sizeof(*thread_data->td->counts))); +#if CONFIG_PALETTE + // Allocate buffers used by palette coding mode. + if (cpi->common.allow_screen_content_tools) { + CHECK_MEM_ERROR( + cm, thread_data->td->palette_buffer, + aom_memalign(16, sizeof(*thread_data->td->palette_buffer))); + } +#endif // CONFIG_PALETTE + // Create threads if (!winterface->reset(worker)) aom_internal_error(&cm->error, AOM_CODEC_ERROR, @@ -127,6 +155,12 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) { if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; +#if CONFIG_MOTION_VAR + thread_data->td->mb.above_pred_buf = thread_data->td->above_pred_buf; + thread_data->td->mb.left_pred_buf = thread_data->td->left_pred_buf; + thread_data->td->mb.wsrc_buf = thread_data->td->wsrc_buf; + thread_data->td->mb.mask_buf = thread_data->td->mask_buf; +#endif } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, @@ -134,12 +168,8 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) { } #if CONFIG_PALETTE - // Allocate buffers used by palette coding mode. - if (cpi->common.allow_screen_content_tools && i < num_workers - 1) { - MACROBLOCK *x = &thread_data->td->mb; - CHECK_MEM_ERROR(cm, x->palette_buffer, - aom_memalign(16, sizeof(*x->palette_buffer))); - } + if (cpi->common.allow_screen_content_tools && i < num_workers - 1) + thread_data->td->mb.palette_buffer = thread_data->td->palette_buffer; #endif // CONFIG_PALETTE } @@ -171,6 +201,9 @@ void av1_encode_tiles_mt(AV1_COMP *cpi) { if (i < cpi->num_workers - 1) { av1_accumulate_frame_counts(&cm->counts, thread_data->td->counts); accumulate_rd_opt(&cpi->td, thread_data->td); +#if CONFIG_VAR_TX + cpi->td.mb.txb_split_count += thread_data->td->mb.txb_split_count; +#endif } } } diff --git a/third_party/aom/av1/encoder/firstpass.c b/third_party/aom/av1/encoder/firstpass.c index e35a54ef2..7a0abba2d 100644 --- a/third_party/aom/av1/encoder/firstpass.c +++ b/third_party/aom/av1/encoder/firstpass.c @@ -568,16 +568,11 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { od_init_qm(x->daala_enc.state.qm, x->daala_enc.state.qm_inv, x->daala_enc.qm == OD_HVS_QM ? OD_QM8_Q4_HVS : OD_QM8_Q4_FLAT); -#if CONFIG_DAALA_EC +#if !CONFIG_ANS od_ec_enc_init(&x->daala_enc.w.ec, 65025); -#else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." -#endif - -#if CONFIG_DAALA_EC od_ec_enc_reset(&x->daala_enc.w.ec); #else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +#error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif } #endif @@ -598,6 +593,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { av1_init_mv_probs(cm); #if CONFIG_ADAPT_SCAN av1_init_scan_order(cm); + av1_deliver_eob_threshold(cm, xd); #endif av1_convolve_init(cm); #if CONFIG_PVQ @@ -884,7 +880,7 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { xd->mi[0]->mbmi.tx_size = TX_4X4; xd->mi[0]->mbmi.ref_frame[0] = LAST_FRAME; xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME; - av1_build_inter_predictors_sby(xd, mb_row * mb_scale, + av1_build_inter_predictors_sby(cm, xd, mb_row * mb_scale, mb_col * mb_scale, NULL, bsize); av1_encode_sby_pass1(cm, x, bsize); sum_mvr += mv.row; @@ -997,10 +993,10 @@ void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { } #if CONFIG_PVQ -#if CONFIG_DAALA_EC +#if !CONFIG_ANS od_ec_enc_clear(&x->daala_enc.w.ec); #else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +#error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif x->pvq_q->last_pos = x->pvq_q->curr_pos; @@ -1235,28 +1231,26 @@ static void setup_rf_level_maxq(AV1_COMP *cpi) { } } -void av1_init_subsampling(AV1_COMP *cpi) { - const AV1_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; - const int w = cm->width; - const int h = cm->height; - int i; - - for (i = 0; i < FRAME_SCALE_STEPS; ++i) { - // Note: Frames with odd-sized dimensions may result from this scaling. - rc->frame_width[i] = (w * 16) / frame_scale_factor[i]; - rc->frame_height[i] = (h * 16) / frame_scale_factor[i]; - } - - setup_rf_level_maxq(cpi); +void av1_calculate_next_scaled_size(const AV1_COMP *cpi, + int *scaled_frame_width, + int *scaled_frame_height) { + *scaled_frame_width = + cpi->oxcf.width * cpi->resize_next_scale_num / cpi->resize_next_scale_den; + *scaled_frame_height = cpi->oxcf.height * cpi->resize_next_scale_num / + cpi->resize_next_scale_den; } -void av1_calculate_coded_size(AV1_COMP *cpi, int *scaled_frame_width, - int *scaled_frame_height) { - RATE_CONTROL *const rc = &cpi->rc; - *scaled_frame_width = rc->frame_width[rc->frame_size_selector]; - *scaled_frame_height = rc->frame_height[rc->frame_size_selector]; +#if CONFIG_FRAME_SUPERRES +void av1_calculate_superres_size(const AV1_COMP *cpi, int *encoded_width, + int *encoded_height) { + *encoded_width = cpi->oxcf.scaled_frame_width * + cpi->common.superres_scale_numerator / + SUPERRES_SCALE_DENOMINATOR; + *encoded_height = cpi->oxcf.scaled_frame_height * + cpi->common.superres_scale_numerator / + SUPERRES_SCALE_DENOMINATOR; } +#endif // CONFIG_FRAME_SUPERRES void av1_init_second_pass(AV1_COMP *cpi) { const AV1EncoderConfig *const oxcf = &cpi->oxcf; @@ -1316,7 +1310,7 @@ void av1_init_second_pass(AV1_COMP *cpi) { twopass->last_kfgroup_zeromotion_pct = 100; if (oxcf->resize_mode != RESIZE_NONE) { - av1_init_subsampling(cpi); + setup_rf_level_maxq(cpi); } } @@ -2300,7 +2294,8 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (oxcf->resize_mode == RESIZE_DYNAMIC) { // Default to starting GF groups at normal frame size. - cpi->rc.next_frame_size_selector = UNSCALED; + // TODO(afergs): Make a function for this + cpi->resize_next_scale_num = cpi->resize_next_scale_den; } } @@ -2646,7 +2641,8 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) { if (oxcf->resize_mode == RESIZE_DYNAMIC) { // Default to normal-sized frame on keyframes. - cpi->rc.next_frame_size_selector = UNSCALED; + // TODO(afergs): Make a function for this + cpi->resize_next_scale_num = cpi->resize_next_scale_den; } } diff --git a/third_party/aom/av1/encoder/firstpass.h b/third_party/aom/av1/encoder/firstpass.h index db459cc22..43104454c 100644 --- a/third_party/aom/av1/encoder/firstpass.h +++ b/third_party/aom/av1/encoder/firstpass.h @@ -177,10 +177,17 @@ void av1_twopass_postencode_update(struct AV1_COMP *cpi); // Post encode update of the rate control parameters for 2-pass void av1_twopass_postencode_update(struct AV1_COMP *cpi); -void av1_init_subsampling(struct AV1_COMP *cpi); - -void av1_calculate_coded_size(struct AV1_COMP *cpi, int *scaled_frame_width, - int *scaled_frame_height); +void av1_calculate_next_scaled_size(const struct AV1_COMP *cpi, + int *scaled_frame_width, + int *scaled_frame_height); + +#if CONFIG_FRAME_SUPERRES +// This is the size after superress scaling, which could be 1:1. +// Superres scaling happens after regular downscaling. +// TODO(afergs): Limit overall reduction to 1/2 of the original size +void av1_calculate_superres_size(const struct AV1_COMP *cpi, int *encoded_width, + int *encoded_height); +#endif // CONFIG_FRAME_SUPERRES #if CONFIG_EXT_REFS static INLINE int get_number_of_extra_arfs(int interval, int arf_pending) { diff --git a/third_party/aom/av1/encoder/global_motion.c b/third_party/aom/av1/encoder/global_motion.c index 2a6204939..74cbc8ae7 100644 --- a/third_party/aom/av1/encoder/global_motion.c +++ b/third_party/aom/av1/encoder/global_motion.c @@ -124,14 +124,15 @@ static void force_wmtype(WarpedMotionParams *wm, TransformationType wmtype) { wm->wmtype = wmtype; } -double refine_integerized_param(WarpedMotionParams *wm, - TransformationType wmtype, +int64_t refine_integerized_param(WarpedMotionParams *wm, + TransformationType wmtype, #if CONFIG_HIGHBITDEPTH - int use_hbd, int bd, + int use_hbd, int bd, #endif // CONFIG_HIGHBITDEPTH - uint8_t *ref, int r_width, int r_height, - int r_stride, uint8_t *dst, int d_width, - int d_height, int d_stride, int n_refinements) { + uint8_t *ref, int r_width, int r_height, + int r_stride, uint8_t *dst, int d_width, + int d_height, int d_stride, + int n_refinements) { static const int max_trans_model_params[TRANS_TYPES] = { 0, 2, 4, 6, 8, 8, 8 }; @@ -139,22 +140,21 @@ double refine_integerized_param(WarpedMotionParams *wm, int i = 0, p; int n_params = max_trans_model_params[wmtype]; int32_t *param_mat = wm->wmmat; - double step_error; + int64_t step_error, best_error; int32_t step; int32_t *param; int32_t curr_param; int32_t best_param; - double best_error; force_wmtype(wm, wmtype); - best_error = av1_warp_erroradv(wm, + best_error = av1_warp_error(wm, #if CONFIG_HIGHBITDEPTH - use_hbd, bd, + use_hbd, bd, #endif // CONFIG_HIGHBITDEPTH - ref, r_width, r_height, r_stride, - dst + border * d_stride + border, border, - border, d_width - 2 * border, - d_height - 2 * border, d_stride, 0, 0, 16, 16); + ref, r_width, r_height, r_stride, + dst + border * d_stride + border, border, border, + d_width - 2 * border, d_height - 2 * border, + d_stride, 0, 0, 16, 16); step = 1 << (n_refinements + 1); for (i = 0; i < n_refinements; i++, step >>= 1) { for (p = 0; p < n_params; ++p) { @@ -167,7 +167,7 @@ double refine_integerized_param(WarpedMotionParams *wm, best_param = curr_param; // look to the left *param = add_param_offset(p, curr_param, -step); - step_error = av1_warp_erroradv( + step_error = av1_warp_error( wm, #if CONFIG_HIGHBITDEPTH use_hbd, bd, @@ -183,7 +183,7 @@ double refine_integerized_param(WarpedMotionParams *wm, // look to the right *param = add_param_offset(p, curr_param, step); - step_error = av1_warp_erroradv( + step_error = av1_warp_error( wm, #if CONFIG_HIGHBITDEPTH use_hbd, bd, @@ -202,7 +202,7 @@ double refine_integerized_param(WarpedMotionParams *wm, // for the biggest step size while (step_dir) { *param = add_param_offset(p, best_param, step * step_dir); - step_error = av1_warp_erroradv( + step_error = av1_warp_error( wm, #if CONFIG_HIGHBITDEPTH use_hbd, bd, diff --git a/third_party/aom/av1/encoder/global_motion.h b/third_party/aom/av1/encoder/global_motion.h index 8fc757f38..38509df6a 100644 --- a/third_party/aom/av1/encoder/global_motion.h +++ b/third_party/aom/av1/encoder/global_motion.h @@ -26,14 +26,17 @@ void convert_model_to_params(const double *params, WarpedMotionParams *model); int is_enough_erroradvantage(double erroradv, int params_cost); -double refine_integerized_param(WarpedMotionParams *wm, - TransformationType wmtype, +// Returns the av1_warp_error between "dst" and the result of applying the +// motion params that result from fine-tuning "wm" to "ref". Note that "wm" is +// modified in place. +int64_t refine_integerized_param(WarpedMotionParams *wm, + TransformationType wmtype, #if CONFIG_HIGHBITDEPTH - int use_hbd, int bd, + int use_hbd, int bd, #endif // CONFIG_HIGHBITDEPTH - uint8_t *ref, int r_width, int r_height, - int r_stride, uint8_t *dst, int d_width, - int d_height, int d_stride, int n_refinements); + uint8_t *ref, int r_width, int r_height, + int r_stride, uint8_t *dst, int d_width, + int d_height, int d_stride, int n_refinements); /* Computes "num_motions" candidate global motion parameters between two frames. diff --git a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c b/third_party/aom/av1/encoder/hybrid_fwd_txfm.c index 4fd563163..c57deed84 100644 --- a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c +++ b/third_party/aom/av1/encoder/hybrid_fwd_txfm.c @@ -16,7 +16,7 @@ #include "av1/common/idct.h" #include "av1/encoder/hybrid_fwd_txfm.h" -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_2X2 static void fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, int lossless) { tran_high_t a1 = src_diff[0]; @@ -132,8 +132,38 @@ static void fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff, } #endif // CONFIG_TX64X64 +#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +static void fwd_txfm_16x4(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type, + FWD_TXFM_OPT fwd_txfm_opt) { + (void)fwd_txfm_opt; + av1_fht16x4(src_diff, coeff, diff_stride, tx_type); +} + +static void fwd_txfm_4x16(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type, + FWD_TXFM_OPT fwd_txfm_opt) { + (void)fwd_txfm_opt; + av1_fht4x16(src_diff, coeff, diff_stride, tx_type); +} + +static void fwd_txfm_32x8(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type, + FWD_TXFM_OPT fwd_txfm_opt) { + (void)fwd_txfm_opt; + av1_fht32x8(src_diff, coeff, diff_stride, tx_type); +} + +static void fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff, + int diff_stride, TX_TYPE tx_type, + FWD_TXFM_OPT fwd_txfm_opt) { + (void)fwd_txfm_opt; + av1_fht8x32(src_diff, coeff, diff_stride, tx_type); +} +#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT + #if CONFIG_HIGHBITDEPTH -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_2X2 static void highbd_fwd_txfm_2x2(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, TX_TYPE tx_type, int lossless, const int bd) { @@ -425,11 +455,25 @@ void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, case TX_4X4: fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless); break; -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_2X2 case TX_2X2: fwd_txfm_2x2(src_diff, coeff, diff_stride, tx_type, lossless); break; #endif +#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT + case TX_4X16: + fwd_txfm_4x16(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + break; + case TX_16X4: + fwd_txfm_16x4(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + break; + case TX_8X32: + fwd_txfm_8x32(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + break; + case TX_32X8: + fwd_txfm_32x8(src_diff, coeff, diff_stride, tx_type, fwd_txfm_opt); + break; +#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT default: assert(0); break; } } @@ -488,7 +532,7 @@ void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, case TX_4X4: highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, tx_type, lossless, bd); break; -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_2X2 case TX_2X2: highbd_fwd_txfm_2x2(src_diff, coeff, diff_stride, tx_type, lossless, bd); break; diff --git a/third_party/aom/av1/encoder/mathutils.h b/third_party/aom/av1/encoder/mathutils.h new file mode 100644 index 000000000..23243dd9e --- /dev/null +++ b/third_party/aom/av1/encoder/mathutils.h @@ -0,0 +1,354 @@ +/* + * Copyright (c) 2017, Alliance for Open Media. All rights reserved + * + * This source code is subject to the terms of the BSD 2 Clause License and + * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License + * was not distributed with this source code in the LICENSE file, you can + * obtain it at www.aomedia.org/license/software. If the Alliance for Open + * Media Patent License 1.0 was not distributed with this source code in the + * PATENTS file, you can obtain it at www.aomedia.org/license/patent. + */ + +#include <memory.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> + +static const double TINY_NEAR_ZERO = 1.0E-16; + +// Solves Ax = b, where x and b are column vectors of size nx1 and A is nxn +static INLINE int linsolve(int n, double *A, int stride, double *b, double *x) { + int i, j, k; + double c; + // Forward elimination + for (k = 0; k < n - 1; k++) { + // Bring the largest magitude to the diagonal position + for (i = n - 1; i > k; i--) { + if (fabs(A[(i - 1) * stride + k]) < fabs(A[i * stride + k])) { + for (j = 0; j < n; j++) { + c = A[i * stride + j]; + A[i * stride + j] = A[(i - 1) * stride + j]; + A[(i - 1) * stride + j] = c; + } + c = b[i]; + b[i] = b[i - 1]; + b[i - 1] = c; + } + } + for (i = k; i < n - 1; i++) { + if (fabs(A[k * stride + k]) < TINY_NEAR_ZERO) return 0; + c = A[(i + 1) * stride + k] / A[k * stride + k]; + for (j = 0; j < n; j++) A[(i + 1) * stride + j] -= c * A[k * stride + j]; + b[i + 1] -= c * b[k]; + } + } + // Backward substitution + for (i = n - 1; i >= 0; i--) { + if (fabs(A[i * stride + i]) < TINY_NEAR_ZERO) return 0; + c = 0; + for (j = i + 1; j <= n - 1; j++) c += A[i * stride + j] * x[j]; + x[i] = (b[i] - c) / A[i * stride + i]; + } + + return 1; +} + +//////////////////////////////////////////////////////////////////////////////// +// Least-squares +// Solves for n-dim x in a least squares sense to minimize |Ax - b|^2 +// The solution is simply x = (A'A)^-1 A'b or simply the solution for +// the system: A'A x = A'b +static INLINE int least_squares(int n, double *A, int rows, int stride, + double *b, double *scratch, double *x) { + int i, j, k; + double *scratch_ = NULL; + double *AtA, *Atb; + if (!scratch) { + scratch_ = (double *)aom_malloc(sizeof(*scratch) * n * (n + 1)); + scratch = scratch_; + } + AtA = scratch; + Atb = scratch + n * n; + + for (i = 0; i < n; ++i) { + for (j = i; j < n; ++j) { + AtA[i * n + j] = 0.0; + for (k = 0; k < rows; ++k) + AtA[i * n + j] += A[k * stride + i] * A[k * stride + j]; + AtA[j * n + i] = AtA[i * n + j]; + } + Atb[i] = 0; + for (k = 0; k < rows; ++k) Atb[i] += A[k * stride + i] * b[k]; + } + int ret = linsolve(n, AtA, n, Atb, x); + if (scratch_) aom_free(scratch_); + return ret; +} + +// Matrix multiply +static INLINE void multiply_mat(const double *m1, const double *m2, double *res, + const int m1_rows, const int inner_dim, + const int m2_cols) { + double sum; + + int row, col, inner; + for (row = 0; row < m1_rows; ++row) { + for (col = 0; col < m2_cols; ++col) { + sum = 0; + for (inner = 0; inner < inner_dim; ++inner) + sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col]; + *(res++) = sum; + } + } +} + +// +// The functions below are needed only for homography computation +// Remove if the homography models are not used. +// +/////////////////////////////////////////////////////////////////////////////// +// svdcmp +// Adopted from Numerical Recipes in C + +static INLINE double sign(double a, double b) { + return ((b) >= 0 ? fabs(a) : -fabs(a)); +} + +static INLINE double pythag(double a, double b) { + double ct; + const double absa = fabs(a); + const double absb = fabs(b); + + if (absa > absb) { + ct = absb / absa; + return absa * sqrt(1.0 + ct * ct); + } else { + ct = absa / absb; + return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct); + } +} + +static INLINE int svdcmp(double **u, int m, int n, double w[], double **v) { + const int max_its = 30; + int flag, i, its, j, jj, k, l, nm; + double anorm, c, f, g, h, s, scale, x, y, z; + double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1)); + g = scale = anorm = 0.0; + for (i = 0; i < n; i++) { + l = i + 1; + rv1[i] = scale * g; + g = s = scale = 0.0; + if (i < m) { + for (k = i; k < m; k++) scale += fabs(u[k][i]); + if (scale != 0.) { + for (k = i; k < m; k++) { + u[k][i] /= scale; + s += u[k][i] * u[k][i]; + } + f = u[i][i]; + g = -sign(sqrt(s), f); + h = f * g - s; + u[i][i] = f - g; + for (j = l; j < n; j++) { + for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j]; + f = s / h; + for (k = i; k < m; k++) u[k][j] += f * u[k][i]; + } + for (k = i; k < m; k++) u[k][i] *= scale; + } + } + w[i] = scale * g; + g = s = scale = 0.0; + if (i < m && i != n - 1) { + for (k = l; k < n; k++) scale += fabs(u[i][k]); + if (scale != 0.) { + for (k = l; k < n; k++) { + u[i][k] /= scale; + s += u[i][k] * u[i][k]; + } + f = u[i][l]; + g = -sign(sqrt(s), f); + h = f * g - s; + u[i][l] = f - g; + for (k = l; k < n; k++) rv1[k] = u[i][k] / h; + for (j = l; j < m; j++) { + for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k]; + for (k = l; k < n; k++) u[j][k] += s * rv1[k]; + } + for (k = l; k < n; k++) u[i][k] *= scale; + } + } + anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i]))); + } + + for (i = n - 1; i >= 0; i--) { + if (i < n - 1) { + if (g != 0.) { + for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g; + for (j = l; j < n; j++) { + for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j]; + for (k = l; k < n; k++) v[k][j] += s * v[k][i]; + } + } + for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0; + } + v[i][i] = 1.0; + g = rv1[i]; + l = i; + } + for (i = AOMMIN(m, n) - 1; i >= 0; i--) { + l = i + 1; + g = w[i]; + for (j = l; j < n; j++) u[i][j] = 0.0; + if (g != 0.) { + g = 1.0 / g; + for (j = l; j < n; j++) { + for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j]; + f = (s / u[i][i]) * g; + for (k = i; k < m; k++) u[k][j] += f * u[k][i]; + } + for (j = i; j < m; j++) u[j][i] *= g; + } else { + for (j = i; j < m; j++) u[j][i] = 0.0; + } + ++u[i][i]; + } + for (k = n - 1; k >= 0; k--) { + for (its = 0; its < max_its; its++) { + flag = 1; + for (l = k; l >= 0; l--) { + nm = l - 1; + if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) { + flag = 0; + break; + } + if ((double)(fabs(w[nm]) + anorm) == anorm) break; + } + if (flag) { + c = 0.0; + s = 1.0; + for (i = l; i <= k; i++) { + f = s * rv1[i]; + rv1[i] = c * rv1[i]; + if ((double)(fabs(f) + anorm) == anorm) break; + g = w[i]; + h = pythag(f, g); + w[i] = h; + h = 1.0 / h; + c = g * h; + s = -f * h; + for (j = 0; j < m; j++) { + y = u[j][nm]; + z = u[j][i]; + u[j][nm] = y * c + z * s; + u[j][i] = z * c - y * s; + } + } + } + z = w[k]; + if (l == k) { + if (z < 0.0) { + w[k] = -z; + for (j = 0; j < n; j++) v[j][k] = -v[j][k]; + } + break; + } + if (its == max_its - 1) { + aom_free(rv1); + return 1; + } + assert(k > 0); + x = w[l]; + nm = k - 1; + y = w[nm]; + g = rv1[nm]; + h = rv1[k]; + f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y); + g = pythag(f, 1.0); + f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x; + c = s = 1.0; + for (j = l; j <= nm; j++) { + i = j + 1; + g = rv1[i]; + y = w[i]; + h = s * g; + g = c * g; + z = pythag(f, h); + rv1[j] = z; + c = f / z; + s = h / z; + f = x * c + g * s; + g = g * c - x * s; + h = y * s; + y *= c; + for (jj = 0; jj < n; jj++) { + x = v[jj][j]; + z = v[jj][i]; + v[jj][j] = x * c + z * s; + v[jj][i] = z * c - x * s; + } + z = pythag(f, h); + w[j] = z; + if (z != 0.) { + z = 1.0 / z; + c = f * z; + s = h * z; + } + f = c * g + s * y; + x = c * y - s * g; + for (jj = 0; jj < m; jj++) { + y = u[jj][j]; + z = u[jj][i]; + u[jj][j] = y * c + z * s; + u[jj][i] = z * c - y * s; + } + } + rv1[l] = 0.0; + rv1[k] = f; + w[k] = x; + } + } + aom_free(rv1); + return 0; +} + +static INLINE int SVD(double *U, double *W, double *V, double *matx, int M, + int N) { + // Assumes allocation for U is MxN + double **nrU = (double **)aom_malloc((M) * sizeof(*nrU)); + double **nrV = (double **)aom_malloc((N) * sizeof(*nrV)); + int problem, i; + + problem = !(nrU && nrV); + if (!problem) { + for (i = 0; i < M; i++) { + nrU[i] = &U[i * N]; + } + for (i = 0; i < N; i++) { + nrV[i] = &V[i * N]; + } + } else { + if (nrU) aom_free(nrU); + if (nrV) aom_free(nrV); + return 1; + } + + /* copy from given matx into nrU */ + for (i = 0; i < M; i++) { + memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx)); + } + + /* HERE IT IS: do SVD */ + if (svdcmp(nrU, M, N, W, nrV)) { + aom_free(nrU); + aom_free(nrV); + return 1; + } + + /* aom_free Numerical Recipes arrays */ + aom_free(nrU); + aom_free(nrV); + + return 0; +} diff --git a/third_party/aom/av1/encoder/mbgraph.c b/third_party/aom/av1/encoder/mbgraph.c index 1296027dc..3f5daebcc 100644 --- a/third_party/aom/av1/encoder/mbgraph.c +++ b/third_party/aom/av1/encoder/mbgraph.c @@ -52,11 +52,14 @@ static unsigned int do_16x16_motion_iteration(AV1_COMP *cpi, const MV *ref_mv, { int distortion; unsigned int sse; - cpi->find_fractional_mv_step(x, ref_mv, cpi->common.allow_high_precision_mv, - x->errorperbit, &v_fn_ptr, 0, - mv_sf->subpel_iters_per_step, - cond_cost_list(cpi, cost_list), NULL, NULL, - &distortion, &sse, NULL, 0, 0, 0); + cpi->find_fractional_mv_step( + x, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, + &v_fn_ptr, 0, mv_sf->subpel_iters_per_step, + cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, +#if CONFIG_EXT_INTER + NULL, 0, 0, +#endif + 0, 0, 0); } #if CONFIG_EXT_INTER @@ -71,7 +74,8 @@ static unsigned int do_16x16_motion_iteration(AV1_COMP *cpi, const MV *ref_mv, xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME; #endif // CONFIG_EXT_INTER - av1_build_inter_predictors_sby(xd, mb_row, mb_col, NULL, BLOCK_16X16); + av1_build_inter_predictors_sby(&cpi->common, xd, mb_row, mb_col, NULL, + BLOCK_16X16); /* restore UMV window */ x->mv_limits = tmp_mv_limits; diff --git a/third_party/aom/av1/encoder/mcomp.c b/third_party/aom/av1/encoder/mcomp.c index d069eefb0..52080ca0d 100644 --- a/third_party/aom/av1/encoder/mcomp.c +++ b/third_party/aom/av1/encoder/mcomp.c @@ -110,7 +110,7 @@ static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, int sad_per_bit) { const MV diff = { (mv->row - ref->row) * 8, (mv->col - ref->col) * 8 }; return ROUND_POWER_OF_TWO( - (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->mvsadcost) * sad_per_bit, + (unsigned)mv_cost(&diff, x->nmvjointcost, x->mvcost) * sad_per_bit, AV1_PROB_COST_SHIFT); } @@ -176,6 +176,7 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { } /* checks if (r, c) has better score than previous best */ +#if CONFIG_EXT_INTER #define CHECK_BETTER(v, r, c) \ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ MV this_mv = { r, c }; \ @@ -183,6 +184,10 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { if (second_pred == NULL) \ thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \ src_address, src_stride, &sse); \ + else if (mask) \ + thismse = vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \ + src_address, src_stride, second_pred, mask, \ + mask_stride, invert_mask, &sse); \ else \ thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \ src_address, src_stride, &sse, second_pred); \ @@ -197,6 +202,29 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { } else { \ v = INT_MAX; \ } +#else +#define CHECK_BETTER(v, r, c) \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + MV this_mv = { r, c }; \ + v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \ + if (second_pred == NULL) \ + thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \ + src_address, src_stride, &sse); \ + else \ + thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \ + src_address, src_stride, &sse, second_pred); \ + v += thismse; \ + if (v < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } +#endif // CONFIG_EXT_INTER #define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c) @@ -206,6 +234,26 @@ static INLINE const uint8_t *upre(const uint8_t *buf, int stride, int r, } /* checks if (r, c) has better score than previous best */ +#if CONFIG_EXT_INTER +#define CHECK_BETTER1(v, r, c) \ + if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ + MV this_mv = { r, c }; \ + thismse = upsampled_pref_error( \ + xd, vfp, src_address, src_stride, upre(y, y_stride, r, c), y_stride, \ + second_pred, mask, mask_stride, invert_mask, w, h, &sse); \ + v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \ + v += thismse; \ + if (v < besterr) { \ + besterr = v; \ + br = r; \ + bc = c; \ + *distortion = thismse; \ + *sse1 = sse; \ + } \ + } else { \ + v = INT_MAX; \ + } +#else #define CHECK_BETTER1(v, r, c) \ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ MV this_mv = { r, c }; \ @@ -224,6 +272,7 @@ static INLINE const uint8_t *upre(const uint8_t *buf, int stride, int r, } else { \ v = INT_MAX; \ } +#endif // CONFIG_EXT_INTER #define FIRST_LEVEL_CHECKS \ { \ @@ -327,20 +376,36 @@ static unsigned int setup_center_error( const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv, int error_per_bit, const aom_variance_fn_ptr_t *vfp, const uint8_t *const src, const int src_stride, const uint8_t *const y, - int y_stride, const uint8_t *second_pred, int w, int h, int offset, - int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion) { + int y_stride, const uint8_t *second_pred, +#if CONFIG_EXT_INTER + const uint8_t *mask, int mask_stride, int invert_mask, +#endif + int w, int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1, + int *distortion) { unsigned int besterr; #if CONFIG_HIGHBITDEPTH if (second_pred != NULL) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]); - aom_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, - y_stride); +#if CONFIG_EXT_INTER + if (mask) + aom_highbd_comp_mask_pred(comp_pred16, second_pred, w, h, y + offset, + y_stride, mask, mask_stride, invert_mask); + else +#endif + aom_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset, + y_stride); besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1); } else { DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]); - aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); +#if CONFIG_EXT_INTER + if (mask) + aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride, + mask, mask_stride, invert_mask); + else +#endif + aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); } } else { @@ -352,7 +417,13 @@ static unsigned int setup_center_error( (void)xd; if (second_pred != NULL) { DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]); - aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); +#if CONFIG_EXT_INTER + if (mask) + aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride, + mask, mask_stride, invert_mask); + else +#endif + aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); } else { besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); @@ -391,12 +462,19 @@ int av1_find_best_sub_pixel_tree_pruned_evenmore( MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], int *distortion, - unsigned int *sse1, const uint8_t *second_pred, int w, int h, - int use_upsampled_ref) { + unsigned int *sse1, const uint8_t *second_pred, +#if CONFIG_EXT_INTER + const uint8_t *mask, int mask_stride, int invert_mask, +#endif + int w, int h, int use_upsampled_ref) { SETUP_SUBPEL_SEARCH; - besterr = setup_center_error( - xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y, - y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); + besterr = + setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address, + src_stride, y, y_stride, second_pred, +#if CONFIG_EXT_INTER + mask, mask_stride, invert_mask, +#endif + w, h, offset, mvjcost, mvcost, sse1, distortion); (void)halfiters; (void)quarteriters; (void)eighthiters; @@ -457,14 +535,21 @@ int av1_find_best_sub_pixel_tree_pruned_more( MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], int *distortion, - unsigned int *sse1, const uint8_t *second_pred, int w, int h, - int use_upsampled_ref) { + unsigned int *sse1, const uint8_t *second_pred, +#if CONFIG_EXT_INTER + const uint8_t *mask, int mask_stride, int invert_mask, +#endif + int w, int h, int use_upsampled_ref) { SETUP_SUBPEL_SEARCH; (void)use_upsampled_ref; - besterr = setup_center_error( - xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y, - y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); + besterr = + setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address, + src_stride, y, y_stride, second_pred, +#if CONFIG_EXT_INTER + mask, mask_stride, invert_mask, +#endif + w, h, offset, mvjcost, mvcost, sse1, distortion); if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { @@ -519,14 +604,21 @@ int av1_find_best_sub_pixel_tree_pruned( MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], int *distortion, - unsigned int *sse1, const uint8_t *second_pred, int w, int h, - int use_upsampled_ref) { + unsigned int *sse1, const uint8_t *second_pred, +#if CONFIG_EXT_INTER + const uint8_t *mask, int mask_stride, int invert_mask, +#endif + int w, int h, int use_upsampled_ref) { SETUP_SUBPEL_SEARCH; (void)use_upsampled_ref; - besterr = setup_center_error( - xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y, - y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); + besterr = + setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address, + src_stride, y, y_stride, second_pred, +#if CONFIG_EXT_INTER + mask, mask_stride, invert_mask, +#endif + w, h, offset, mvjcost, mvcost, sse1, distortion); if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && cost_list[4] != INT_MAX) { @@ -612,17 +704,29 @@ static int upsampled_pref_error(const MACROBLOCKD *xd, const aom_variance_fn_ptr_t *vfp, const uint8_t *const src, const int src_stride, const uint8_t *const y, int y_stride, - const uint8_t *second_pred, int w, int h, - unsigned int *sse) { + const uint8_t *second_pred, +#if CONFIG_EXT_INTER + const uint8_t *mask, int mask_stride, + int invert_mask, +#endif + int w, int h, unsigned int *sse) { unsigned int besterr; #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]); - if (second_pred != NULL) - aom_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y, - y_stride); - else + if (second_pred != NULL) { +#if CONFIG_EXT_INTER + if (mask) + aom_highbd_comp_mask_upsampled_pred(pred16, second_pred, w, h, y, + y_stride, mask, mask_stride, + invert_mask); + else +#endif + aom_highbd_comp_avg_upsampled_pred(pred16, second_pred, w, h, y, + y_stride); + } else { aom_highbd_upsampled_pred(pred16, w, h, y, y_stride); + } besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, sse); } else { @@ -631,10 +735,17 @@ static int upsampled_pref_error(const MACROBLOCKD *xd, DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); (void)xd; #endif // CONFIG_HIGHBITDEPTH - if (second_pred != NULL) - aom_comp_avg_upsampled_pred(pred, second_pred, w, h, y, y_stride); - else + if (second_pred != NULL) { +#if CONFIG_EXT_INTER + if (mask) + aom_comp_mask_upsampled_pred(pred, second_pred, w, h, y, y_stride, mask, + mask_stride, invert_mask); + else +#endif + aom_comp_avg_upsampled_pred(pred, second_pred, w, h, y, y_stride); + } else { aom_upsampled_pred(pred, w, h, y, y_stride); + } besterr = vfp->vf(pred, w, src, src_stride, sse); #if CONFIG_HIGHBITDEPTH @@ -647,23 +758,32 @@ static unsigned int upsampled_setup_center_error( const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv, int error_per_bit, const aom_variance_fn_ptr_t *vfp, const uint8_t *const src, const int src_stride, const uint8_t *const y, - int y_stride, const uint8_t *second_pred, int w, int h, int offset, - int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion) { - unsigned int besterr = upsampled_pref_error( - xd, vfp, src, src_stride, y + offset, y_stride, second_pred, w, h, sse1); + int y_stride, const uint8_t *second_pred, +#if CONFIG_EXT_INTER + const uint8_t *mask, int mask_stride, int invert_mask, +#endif + int w, int h, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1, + int *distortion) { + unsigned int besterr = upsampled_pref_error(xd, vfp, src, src_stride, + y + offset, y_stride, second_pred, +#if CONFIG_EXT_INTER + mask, mask_stride, invert_mask, +#endif + w, h, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); return besterr; } -int av1_find_best_sub_pixel_tree(MACROBLOCK *x, const MV *ref_mv, int allow_hp, - int error_per_bit, - const aom_variance_fn_ptr_t *vfp, - int forced_stop, int iters_per_step, - int *cost_list, int *mvjcost, int *mvcost[2], - int *distortion, unsigned int *sse1, - const uint8_t *second_pred, int w, int h, - int use_upsampled_ref) { +int av1_find_best_sub_pixel_tree( + MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit, + const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, + int *cost_list, int *mvjcost, int *mvcost[2], int *distortion, + unsigned int *sse1, const uint8_t *second_pred, +#if CONFIG_EXT_INTER + const uint8_t *mask, int mask_stride, int invert_mask, +#endif + int w, int h, int use_upsampled_ref) { const uint8_t *const src_address = x->plane[0].src.buf; const int src_stride = x->plane[0].src.stride; const MACROBLOCKD *xd = &x->e_mbd; @@ -700,12 +820,19 @@ int av1_find_best_sub_pixel_tree(MACROBLOCK *x, const MV *ref_mv, int allow_hp, if (use_upsampled_ref) besterr = upsampled_setup_center_error( xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y, - y_stride, second_pred, w, h, (offset * 8), mvjcost, mvcost, sse1, - distortion); + y_stride, second_pred, +#if CONFIG_EXT_INTER + mask, mask_stride, invert_mask, +#endif + w, h, (offset * 8), mvjcost, mvcost, sse1, distortion); else - besterr = setup_center_error( - xd, bestmv, ref_mv, error_per_bit, vfp, src_address, src_stride, y, - y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); + besterr = + setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, src_address, + src_stride, y, y_stride, second_pred, +#if CONFIG_EXT_INTER + mask, mask_stride, invert_mask, +#endif + w, h, offset, mvjcost, mvcost, sse1, distortion); (void)cost_list; // to silence compiler warning @@ -721,14 +848,23 @@ int av1_find_best_sub_pixel_tree(MACROBLOCK *x, const MV *ref_mv, int allow_hp, const uint8_t *const pre_address = y + tr * y_stride + tc; thismse = upsampled_pref_error(xd, vfp, src_address, src_stride, - pre_address, y_stride, second_pred, w, - h, &sse); + pre_address, y_stride, second_pred, +#if CONFIG_EXT_INTER + mask, mask_stride, invert_mask, +#endif + w, h, &sse); } else { const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); if (second_pred == NULL) thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse); +#if CONFIG_EXT_INTER + else if (mask) + thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr), + src_address, src_stride, second_pred, mask, + mask_stride, invert_mask, &sse); +#endif else thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse, second_pred); @@ -760,15 +896,24 @@ int av1_find_best_sub_pixel_tree(MACROBLOCK *x, const MV *ref_mv, int allow_hp, if (use_upsampled_ref) { const uint8_t *const pre_address = y + tr * y_stride + tc; - thismse = - upsampled_pref_error(xd, vfp, src_address, src_stride, pre_address, - y_stride, second_pred, w, h, &sse); + thismse = upsampled_pref_error(xd, vfp, src_address, src_stride, + pre_address, y_stride, second_pred, +#if CONFIG_EXT_INTER + mask, mask_stride, invert_mask, +#endif + w, h, &sse); } else { const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); if (second_pred == NULL) thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse); +#if CONFIG_EXT_INTER + else if (mask) + thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr), + src_address, src_stride, second_pred, mask, + mask_stride, invert_mask, &sse); +#endif else thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse, second_pred); @@ -822,6 +967,102 @@ int av1_find_best_sub_pixel_tree(MACROBLOCK *x, const MV *ref_mv, int allow_hp, #undef PRE #undef CHECK_BETTER +#if CONFIG_WARPED_MOTION +unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x, + BLOCK_SIZE bsize, int mi_row, int mi_col, + const MV *this_mv) { + const AV1_COMMON *const cm = &cpi->common; + MACROBLOCKD *xd = &x->e_mbd; + MODE_INFO *mi = xd->mi[0]; + MB_MODE_INFO *mbmi = &mi->mbmi; + const uint8_t *const src = x->plane[0].src.buf; + const int src_stride = x->plane[0].src.stride; + uint8_t *const dst = xd->plane[0].dst.buf; + const int dst_stride = xd->plane[0].dst.stride; + const aom_variance_fn_ptr_t *vfp = &cpi->fn_ptr[bsize]; + const MV ref_mv = x->mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv; + unsigned int mse; + unsigned int sse; + + av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize); + mse = vfp->vf(dst, dst_stride, src, src_stride, &sse); + mse += + mv_err_cost(this_mv, &ref_mv, x->nmvjointcost, x->mvcost, x->errorperbit); + return mse; +} + +// Refine MV in a small range +unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int *pts, int *pts_inref) { + const AV1_COMMON *const cm = &cpi->common; + MACROBLOCKD *xd = &x->e_mbd; + MODE_INFO *mi = xd->mi[0]; + MB_MODE_INFO *mbmi = &mi->mbmi; + const MV neighbors[8] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 }, + { 0, -2 }, { 2, 0 }, { 0, 2 }, { -2, 0 } }; + const MV ref_mv = x->mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_mv; + int16_t br = mbmi->mv[0].as_mv.row; + int16_t bc = mbmi->mv[0].as_mv.col; + int16_t *tr = &mbmi->mv[0].as_mv.row; + int16_t *tc = &mbmi->mv[0].as_mv.col; + WarpedMotionParams best_wm_params = mbmi->wm_params[0]; + unsigned int bestmse; + int minc, maxc, minr, maxr; + const int start = cm->allow_high_precision_mv ? 0 : 4; + int ite; + + av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, + &ref_mv); + + // Calculate the center position's error + assert(bc >= minc && bc <= maxc && br >= minr && br <= maxr); + bestmse = av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col, + &mbmi->mv[0].as_mv); + + // MV search + for (ite = 0; ite < 2; ++ite) { + int best_idx = -1; + int idx; + + for (idx = start; idx < start + 4; ++idx) { + unsigned int thismse; + + *tr = br + neighbors[idx].row; + *tc = bc + neighbors[idx].col; + + if (*tc >= minc && *tc <= maxc && *tr >= minr && *tr <= maxr) { + MV this_mv = { *tr, *tc }; + if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize, *tr, + *tc, &mbmi->wm_params[0], mi_row, mi_col)) { + thismse = + av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col, &this_mv); + + if (thismse < bestmse) { + best_idx = idx; + best_wm_params = mbmi->wm_params[0]; + bestmse = thismse; + } + } + } + } + + if (best_idx == -1) break; + + if (best_idx >= 0) { + br += neighbors[best_idx].row; + bc += neighbors[best_idx].col; + } + } + + *tr = br; + *tc = bc; + mbmi->wm_params[0] = best_wm_params; + + return bestmse; +} +#endif // CONFIG_WARPED_MOTION + static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col, int range) { return ((row - range) >= mv_limits->row_min) & @@ -1232,6 +1473,27 @@ int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv, : 0); } +#if CONFIG_EXT_INTER +int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv, + const MV *center_mv, const uint8_t *second_pred, + const uint8_t *mask, int mask_stride, + int invert_mask, const aom_variance_fn_ptr_t *vfp, + int use_mvcost) { + const MACROBLOCKD *const xd = &x->e_mbd; + const struct buf_2d *const what = &x->plane[0].src; + const struct buf_2d *const in_what = &xd->plane[0].pre[0]; + const MV mv = { best_mv->row * 8, best_mv->col * 8 }; + unsigned int unused; + + return vfp->msvf(what->buf, what->stride, 0, 0, + get_buf_from_mv(in_what, best_mv), in_what->stride, + second_pred, mask, mask_stride, invert_mask, &unused) + + (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, + x->errorperbit) + : 0); +} +#endif + int av1_hex_search(MACROBLOCK *x, MV *start_mv, int search_param, int sad_per_bit, int do_init_search, int *cost_list, const aom_variance_fn_ptr_t *vfp, int use_mvcost, @@ -1685,17 +1947,12 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x, DECLARE_ALIGNED(16, int16_t, src_hbuf[MAX_SB_SQUARE]); DECLARE_ALIGNED(16, int16_t, src_vbuf[MAX_SB_SQUARE]); int idx; - const int bw = 4 << b_width_log2_lookup[bsize]; - const int bh = 4 << b_height_log2_lookup[bsize]; - const int search_width = bw << 1; - const int search_height = bh << 1; const int src_stride = x->plane[0].src.stride; const int ref_stride = xd->plane[0].pre[0].stride; uint8_t const *ref_buf, *src_buf; MV *tmp_mv = &xd->mi[0]->mbmi.mv[0].as_mv; unsigned int best_sad, tmp_sad, sad_arr[4]; MV this_mv; - const int norm_factor = 3 + (bw >> 5); const YV12_BUFFER_CONFIG *scaled_ref_frame = av1_get_scaled_ref_frame(cpi, mbmi->ref_frame[0]); @@ -1724,6 +1981,12 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x, } #endif + const int bw = 4 << b_width_log2_lookup[bsize]; + const int bh = 4 << b_height_log2_lookup[bsize]; + const int search_width = bw << 1; + const int search_height = bh << 1; + const int norm_factor = 3 + (bw >> 5); + // Set up prediction 1-D reference set ref_buf = xd->plane[0].pre[0].buf - (bw >> 1); for (idx = 0; idx < search_width; idx += 16) { @@ -2195,9 +2458,13 @@ int av1_refining_search_sad(MACROBLOCK *x, MV *ref_mv, int error_per_bit, } // This function is called when we do joint motion search in comp_inter_inter -// mode. +// mode, or when searching for one component of an ext-inter compound mode. int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range, const aom_variance_fn_ptr_t *fn_ptr, +#if CONFIG_EXT_INTER + const uint8_t *mask, int mask_stride, + int invert_mask, +#endif const MV *center_mv, const uint8_t *second_pred) { const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 }, { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } }; @@ -2211,10 +2478,18 @@ int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range, clamp_mv(best_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max); - best_sad = - fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), - in_what->stride, second_pred) + - mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit); +#if CONFIG_EXT_INTER + if (mask) + best_sad = fn_ptr->msdf(what->buf, what->stride, + get_buf_from_mv(in_what, best_mv), in_what->stride, + second_pred, mask, mask_stride, invert_mask) + + mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit); + else +#endif + best_sad = + fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), + in_what->stride, second_pred) + + mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit); for (i = 0; i < search_range; ++i) { int best_site = -1; @@ -2224,9 +2499,17 @@ int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range, best_mv->col + neighbors[j].col }; if (is_mv_in(&x->mv_limits, &mv)) { - unsigned int sad = - fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), - in_what->stride, second_pred); + unsigned int sad; +#if CONFIG_EXT_INTER + if (mask) + sad = fn_ptr->msdf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, + second_pred, mask, mask_stride, invert_mask); + else +#endif + sad = fn_ptr->sdaf(what->buf, what->stride, + get_buf_from_mv(in_what, &mv), in_what->stride, + second_pred); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { @@ -2337,612 +2620,20 @@ int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, return var; } -#if CONFIG_EXT_INTER -/* returns subpixel variance error function */ -#define DIST(r, c) \ - vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, src_stride, \ - mask, mask_stride, &sse) - -/* checks if (r, c) has better score than previous best */ - -#define MVC(r, c) \ - (mvcost \ - ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + mvcost[0][((r)-rr)] + \ - mvcost[1][((c)-rc)]) * \ - error_per_bit + \ - 4096) >> \ - 13 \ - : 0) - -#define CHECK_BETTER(v, r, c) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ - thismse = (DIST(r, c)); \ - if ((v = MVC(r, c) + thismse) < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ - } - -#undef CHECK_BETTER0 -#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c) - -#undef CHECK_BETTER1 -#define CHECK_BETTER1(v, r, c) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ - thismse = upsampled_masked_pref_error(xd, mask, mask_stride, vfp, z, \ - src_stride, upre(y, y_stride, r, c), \ - y_stride, w, h, &sse); \ - if ((v = MVC(r, c) + thismse) < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ - } - -int av1_find_best_masked_sub_pixel_tree( - const MACROBLOCK *x, const uint8_t *mask, int mask_stride, MV *bestmv, - const MV *ref_mv, int allow_hp, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, - int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, - int is_second) { - const uint8_t *const z = x->plane[0].src.buf; - const int src_stride = x->plane[0].src.stride; - const MACROBLOCKD *xd = &x->e_mbd; - unsigned int besterr = INT_MAX; - unsigned int sse; - int thismse; - unsigned int whichdir; - unsigned int halfiters = iters_per_step; - unsigned int quarteriters = iters_per_step; - unsigned int eighthiters = iters_per_step; - - const int y_stride = xd->plane[0].pre[is_second].stride; - const int offset = bestmv->row * y_stride + bestmv->col; - const uint8_t *const y = xd->plane[0].pre[is_second].buf; - - int rr = ref_mv->row; - int rc = ref_mv->col; - int br = bestmv->row * 8; - int bc = bestmv->col * 8; - int hstep = 4; - int tr = br; - int tc = bc; - int minc, maxc, minr, maxr; - - av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, - ref_mv); - - // central mv - bestmv->row *= 8; - bestmv->col *= 8; - - // calculate central point error - besterr = - vfp->mvf(y + offset, y_stride, z, src_stride, mask, mask_stride, sse1); - *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); - - // 1/2 pel - FIRST_LEVEL_CHECKS; - if (halfiters > 1) { - SECOND_LEVEL_CHECKS; - } - tr = br; - tc = bc; - - // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only - if (forced_stop != 2) { - hstep >>= 1; - FIRST_LEVEL_CHECKS; - if (quarteriters > 1) { - SECOND_LEVEL_CHECKS; - } - tr = br; - tc = bc; - } - - if (allow_hp && forced_stop == 0) { - hstep >>= 1; - FIRST_LEVEL_CHECKS; - if (eighthiters > 1) { - SECOND_LEVEL_CHECKS; - } - tr = br; - tc = bc; - } - // These lines insure static analysis doesn't warn that - // tr and tc aren't used after the above point. - (void)tr; - (void)tc; - - bestmv->row = br; - bestmv->col = bc; - - return besterr; -} - -static unsigned int setup_masked_center_error( - const uint8_t *mask, int mask_stride, const MV *bestmv, const MV *ref_mv, - int error_per_bit, const aom_variance_fn_ptr_t *vfp, - const uint8_t *const src, const int src_stride, const uint8_t *const y, - int y_stride, int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1, - int *distortion) { - unsigned int besterr; - besterr = - vfp->mvf(y + offset, y_stride, src, src_stride, mask, mask_stride, sse1); - *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); - return besterr; -} - -static int upsampled_masked_pref_error(const MACROBLOCKD *xd, - const uint8_t *mask, int mask_stride, - const aom_variance_fn_ptr_t *vfp, - const uint8_t *const src, - const int src_stride, - const uint8_t *const y, int y_stride, - int w, int h, unsigned int *sse) { - unsigned int besterr; -#if CONFIG_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]); - aom_highbd_upsampled_pred(pred16, w, h, y, y_stride); - - besterr = vfp->mvf(CONVERT_TO_BYTEPTR(pred16), w, src, src_stride, mask, - mask_stride, sse); - } else { - DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); -#else - DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); - (void)xd; -#endif // CONFIG_HIGHBITDEPTH - aom_upsampled_pred(pred, w, h, y, y_stride); - - besterr = vfp->mvf(pred, w, src, src_stride, mask, mask_stride, sse); -#if CONFIG_HIGHBITDEPTH - } -#endif - return besterr; -} - -static unsigned int upsampled_setup_masked_center_error( - const MACROBLOCKD *xd, const uint8_t *mask, int mask_stride, - const MV *bestmv, const MV *ref_mv, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, const uint8_t *const src, - const int src_stride, const uint8_t *const y, int y_stride, int w, int h, - int offset, int *mvjcost, int *mvcost[2], unsigned int *sse1, - int *distortion) { - unsigned int besterr = - upsampled_masked_pref_error(xd, mask, mask_stride, vfp, src, src_stride, - y + offset, y_stride, w, h, sse1); - *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); - return besterr; -} - -int av1_find_best_masked_sub_pixel_tree_up( - const AV1_COMP *cpi, MACROBLOCK *x, const uint8_t *mask, int mask_stride, - int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, int allow_hp, - int error_per_bit, const aom_variance_fn_ptr_t *vfp, int forced_stop, - int iters_per_step, int *mvjcost, int *mvcost[2], int *distortion, - unsigned int *sse1, int is_second, int use_upsampled_ref) { - const uint8_t *const z = x->plane[0].src.buf; - const uint8_t *const src_address = z; - const int src_stride = x->plane[0].src.stride; - MACROBLOCKD *xd = &x->e_mbd; - struct macroblockd_plane *const pd = &xd->plane[0]; - MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - unsigned int besterr = INT_MAX; - unsigned int sse; - unsigned int thismse; - - int rr = ref_mv->row; - int rc = ref_mv->col; - int br = bestmv->row * 8; - int bc = bestmv->col * 8; - int hstep = 4; - int iter; - int round = 3 - forced_stop; - int tr = br; - int tc = bc; - const MV *search_step = search_step_table; - int idx, best_idx = -1; - unsigned int cost_array[5]; - int kr, kc; - const int w = block_size_wide[mbmi->sb_type]; - const int h = block_size_high[mbmi->sb_type]; - int offset; - int y_stride; - const uint8_t *y; - - const struct buf_2d backup_pred = pd->pre[is_second]; - int minc, maxc, minr, maxr; - - av1_set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, - ref_mv); - - if (use_upsampled_ref) { - int ref = xd->mi[0]->mbmi.ref_frame[is_second]; - const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref); - setup_pred_plane(&pd->pre[is_second], mbmi->sb_type, - upsampled_ref->y_buffer, upsampled_ref->y_crop_width, - upsampled_ref->y_crop_height, upsampled_ref->y_stride, - (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x, - pd->subsampling_y); - } - y = pd->pre[is_second].buf; - y_stride = pd->pre[is_second].stride; - offset = bestmv->row * y_stride + bestmv->col; - - if (!allow_hp) - if (round == 3) round = 2; - - bestmv->row *= 8; - bestmv->col *= 8; - - // use_upsampled_ref can be 0 or 1 - if (use_upsampled_ref) - besterr = upsampled_setup_masked_center_error( - xd, mask, mask_stride, bestmv, ref_mv, error_per_bit, vfp, z, - src_stride, y, y_stride, w, h, (offset * 8), mvjcost, mvcost, sse1, - distortion); - else - besterr = setup_masked_center_error( - mask, mask_stride, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, - y_stride, offset, mvjcost, mvcost, sse1, distortion); - - for (iter = 0; iter < round; ++iter) { - // Check vertical and horizontal sub-pixel positions. - for (idx = 0; idx < 4; ++idx) { - tr = br + search_step[idx].row; - tc = bc + search_step[idx].col; - if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { - MV this_mv = { tr, tc }; - - if (use_upsampled_ref) { - const uint8_t *const pre_address = y + tr * y_stride + tc; - - thismse = upsampled_masked_pref_error( - xd, mask, mask_stride, vfp, src_address, src_stride, pre_address, - y_stride, w, h, &sse); - } else { - const uint8_t *const pre_address = - y + (tr >> 3) * y_stride + (tc >> 3); - thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr), - src_address, src_stride, mask, mask_stride, &sse); - } - - cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, - mvcost, error_per_bit); - - if (cost_array[idx] < besterr) { - best_idx = idx; - besterr = cost_array[idx]; - *distortion = thismse; - *sse1 = sse; - } - } else { - cost_array[idx] = INT_MAX; - } - } - - // Check diagonal sub-pixel position - kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep); - kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep); - - tc = bc + kc; - tr = br + kr; - if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { - MV this_mv = { tr, tc }; - - if (use_upsampled_ref) { - const uint8_t *const pre_address = y + tr * y_stride + tc; - - thismse = upsampled_masked_pref_error( - xd, mask, mask_stride, vfp, src_address, src_stride, pre_address, - y_stride, w, h, &sse); - } else { - const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); - - thismse = vfp->msvf(pre_address, y_stride, sp(tc), sp(tr), src_address, - src_stride, mask, mask_stride, &sse); - } - - cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (cost_array[4] < besterr) { - best_idx = 4; - besterr = cost_array[4]; - *distortion = thismse; - *sse1 = sse; - } - } else { - cost_array[idx] = INT_MAX; - } - - if (best_idx < 4 && best_idx >= 0) { - br += search_step[best_idx].row; - bc += search_step[best_idx].col; - } else if (best_idx == 4) { - br = tr; - bc = tc; - } - - if (iters_per_step > 1 && best_idx != -1) { - if (use_upsampled_ref) { - SECOND_LEVEL_CHECKS_BEST(1); - } else { - SECOND_LEVEL_CHECKS_BEST(0); - } - } - - tr = br; - tc = bc; - - search_step += 4; - hstep >>= 1; - best_idx = -1; - } - - // These lines insure static analysis doesn't warn that - // tr and tc aren't used after the above point. - (void)tr; - (void)tc; - - bestmv->row = br; - bestmv->col = bc; - - if (use_upsampled_ref) { - pd->pre[is_second] = backup_pred; - } - - return besterr; -} - -#undef DIST -#undef MVC -#undef CHECK_BETTER - -static int get_masked_mvpred_var(const MACROBLOCK *x, const uint8_t *mask, - int mask_stride, const MV *best_mv, - const MV *center_mv, - const aom_variance_fn_ptr_t *vfp, - int use_mvcost, int is_second) { - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[is_second]; - const MV mv = { best_mv->row * 8, best_mv->col * 8 }; - unsigned int unused; - - return vfp->mvf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), - in_what->stride, mask, mask_stride, &unused) + - (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, - x->errorperbit) - : 0); -} - -int masked_refining_search_sad(const MACROBLOCK *x, const uint8_t *mask, - int mask_stride, MV *ref_mv, int error_per_bit, - int search_range, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *center_mv, int is_second) { - const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[is_second]; - const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; - unsigned int best_sad = - fn_ptr->msdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), - in_what->stride, mask, mask_stride) + - mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); - int i, j; - - for (i = 0; i < search_range; i++) { - int best_site = -1; - - for (j = 0; j < 4; j++) { - const MV mv = { ref_mv->row + neighbors[j].row, - ref_mv->col + neighbors[j].col }; - if (is_mv_in(&x->mv_limits, &mv)) { - unsigned int sad = - fn_ptr->msdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), - in_what->stride, mask, mask_stride); - if (sad < best_sad) { - sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); - if (sad < best_sad) { - best_sad = sad; - best_site = j; - } - } - } - } - - if (best_site == -1) { - break; - } else { - ref_mv->row += neighbors[best_site].row; - ref_mv->col += neighbors[best_site].col; - } - } - return best_sad; -} - -int masked_diamond_search_sad(const MACROBLOCK *x, - const search_site_config *cfg, - const uint8_t *mask, int mask_stride, MV *ref_mv, - MV *best_mv, int search_param, int sad_per_bit, - int *num00, const aom_variance_fn_ptr_t *fn_ptr, - const MV *center_mv, int is_second) { - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[is_second]; - // search_param determines the length of the initial step and hence the number - // of iterations - // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = - // (MAX_FIRST_STEP/4) pel... etc. - const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step]; - const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param; - const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; - const uint8_t *best_address, *in_what_ref; - int best_sad = INT_MAX; - int best_site = 0; - int last_site = 0; - int i, j, step; - - clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, - x->mv_limits.row_min, x->mv_limits.row_max); - in_what_ref = get_buf_from_mv(in_what, ref_mv); - best_address = in_what_ref; - *num00 = 0; - *best_mv = *ref_mv; - - // Check the starting position - best_sad = fn_ptr->msdf(what->buf, what->stride, best_address, - in_what->stride, mask, mask_stride) + - mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); - - i = 1; - - for (step = 0; step < tot_steps; step++) { - for (j = 0; j < cfg->searches_per_step; j++) { - const MV mv = { best_mv->row + ss[i].mv.row, - best_mv->col + ss[i].mv.col }; - if (is_mv_in(&x->mv_limits, &mv)) { - int sad = - fn_ptr->msdf(what->buf, what->stride, best_address + ss[i].offset, - in_what->stride, mask, mask_stride); - if (sad < best_sad) { - sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - best_site = i; - } - } - } - - i++; - } - - if (best_site != last_site) { - best_mv->row += ss[best_site].mv.row; - best_mv->col += ss[best_site].mv.col; - best_address += ss[best_site].offset; - last_site = best_site; -#if defined(NEW_DIAMOND_SEARCH) - while (1) { - const MV this_mv = { best_mv->row + ss[best_site].mv.row, - best_mv->col + ss[best_site].mv.col }; - if (is_mv_in(&x->mv_limits, &this_mv)) { - int sad = fn_ptr->msdf(what->buf, what->stride, - best_address + ss[best_site].offset, - in_what->stride, mask, mask_stride); - if (sad < best_sad) { - sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - best_mv->row += ss[best_site].mv.row; - best_mv->col += ss[best_site].mv.col; - best_address += ss[best_site].offset; - continue; - } - } - } - break; - } -#endif - } else if (best_address == in_what_ref) { - (*num00)++; - } - } - return best_sad; -} - -int av1_masked_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x, - const uint8_t *mask, int mask_stride, - MV *mvp_full, int step_param, int sadpb, - int further_steps, int do_refine, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *ref_mv, MV *dst_mv, int is_second) { - MV temp_mv; - int thissme, n, num00 = 0; - int bestsme = masked_diamond_search_sad(x, &cpi->ss_cfg, mask, mask_stride, - mvp_full, &temp_mv, step_param, sadpb, - &n, fn_ptr, ref_mv, is_second); - if (bestsme < INT_MAX) - bestsme = get_masked_mvpred_var(x, mask, mask_stride, &temp_mv, ref_mv, - fn_ptr, 1, is_second); - *dst_mv = temp_mv; - - // If there won't be more n-step search, check to see if refining search is - // needed. - if (n > further_steps) do_refine = 0; - - while (n < further_steps) { - ++n; - - if (num00) { - num00--; - } else { - thissme = masked_diamond_search_sad( - x, &cpi->ss_cfg, mask, mask_stride, mvp_full, &temp_mv, - step_param + n, sadpb, &num00, fn_ptr, ref_mv, is_second); - if (thissme < INT_MAX) - thissme = get_masked_mvpred_var(x, mask, mask_stride, &temp_mv, ref_mv, - fn_ptr, 1, is_second); - - // check to see if refining search is needed. - if (num00 > further_steps - n) do_refine = 0; - - if (thissme < bestsme) { - bestsme = thissme; - *dst_mv = temp_mv; - } - } - } - - // final 1-away diamond refining search - if (do_refine) { - const int search_range = 8; - MV best_mv = *dst_mv; - thissme = - masked_refining_search_sad(x, mask, mask_stride, &best_mv, sadpb, - search_range, fn_ptr, ref_mv, is_second); - if (thissme < INT_MAX) - thissme = get_masked_mvpred_var(x, mask, mask_stride, &best_mv, ref_mv, - fn_ptr, 1, is_second); - if (thissme < bestsme) { - bestsme = thissme; - *dst_mv = best_mv; - } - } - return bestsme; -} -#endif // CONFIG_EXT_INTER - #if CONFIG_MOTION_VAR /* returns subpixel variance error function */ #define DIST(r, c) \ vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse) /* checks if (r, c) has better score than previous best */ -#define MVC(r, c) \ - (mvcost \ - ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + mvcost[0][((r)-rr)] + \ - mvcost[1][((c)-rc)]) * \ - error_per_bit + \ - 4096) >> \ - 13 \ - : 0) +#define MVC(r, c) \ + (unsigned int)(mvcost \ + ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ + mvcost[0][((r)-rr)] + (int64_t)mvcost[1][((c)-rc)]) * \ + error_per_bit + \ + 4096) >> \ + 13 \ + : 0) #define CHECK_BETTER(v, r, c) \ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ @@ -3452,15 +3143,21 @@ int av1_obmc_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x, (void)thismse; \ (void)cost_list; // Return the maximum MV. -int av1_return_max_sub_pixel_mv(MACROBLOCK *x, const MV *ref_mv, int allow_hp, - int error_per_bit, - const aom_variance_fn_ptr_t *vfp, - int forced_stop, int iters_per_step, - int *cost_list, int *mvjcost, int *mvcost[2], - int *distortion, unsigned int *sse1, - const uint8_t *second_pred, int w, int h, - int use_upsampled_ref) { +int av1_return_max_sub_pixel_mv( + MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit, + const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, + int *cost_list, int *mvjcost, int *mvcost[2], int *distortion, + unsigned int *sse1, const uint8_t *second_pred, +#if CONFIG_EXT_INTER + const uint8_t *mask, int mask_stride, int invert_mask, +#endif + int w, int h, int use_upsampled_ref) { COMMON_MV_TEST; +#if CONFIG_EXT_INTER + (void)mask; + (void)mask_stride; + (void)invert_mask; +#endif (void)minr; (void)minc; bestmv->row = maxr; @@ -3472,17 +3169,23 @@ int av1_return_max_sub_pixel_mv(MACROBLOCK *x, const MV *ref_mv, int allow_hp, return besterr; } // Return the minimum MV. -int av1_return_min_sub_pixel_mv(MACROBLOCK *x, const MV *ref_mv, int allow_hp, - int error_per_bit, - const aom_variance_fn_ptr_t *vfp, - int forced_stop, int iters_per_step, - int *cost_list, int *mvjcost, int *mvcost[2], - int *distortion, unsigned int *sse1, - const uint8_t *second_pred, int w, int h, - int use_upsampled_ref) { +int av1_return_min_sub_pixel_mv( + MACROBLOCK *x, const MV *ref_mv, int allow_hp, int error_per_bit, + const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, + int *cost_list, int *mvjcost, int *mvcost[2], int *distortion, + unsigned int *sse1, const uint8_t *second_pred, +#if CONFIG_EXT_INTER + const uint8_t *mask, int mask_stride, int invert_mask, +#endif + int w, int h, int use_upsampled_ref) { COMMON_MV_TEST; (void)maxr; (void)maxc; +#if CONFIG_EXT_INTER + (void)mask; + (void)mask_stride; + (void)invert_mask; +#endif bestmv->row = minr; bestmv->col = minc; besterr = 0; diff --git a/third_party/aom/av1/encoder/mcomp.h b/third_party/aom/av1/encoder/mcomp.h index 8465860ad..7e8b4b29d 100644 --- a/third_party/aom/av1/encoder/mcomp.h +++ b/third_party/aom/av1/encoder/mcomp.h @@ -58,6 +58,13 @@ int av1_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv, int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv, const MV *center_mv, const uint8_t *second_pred, const aom_variance_fn_ptr_t *vfp, int use_mvcost); +#if CONFIG_EXT_INTER +int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv, + const MV *center_mv, const uint8_t *second_pred, + const uint8_t *mask, int mask_stride, + int invert_mask, const aom_variance_fn_ptr_t *vfp, + int use_mvcost); +#endif struct AV1_COMP; struct SPEED_FEATURES; @@ -91,8 +98,11 @@ typedef int(fractional_mv_step_fp)( const aom_variance_fn_ptr_t *vfp, int forced_stop, // 0 - full, 1 - qtr only, 2 - half only int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], - int *distortion, unsigned int *sse1, const uint8_t *second_pred, int w, - int h, int use_upsampled_ref); + int *distortion, unsigned int *sse1, const uint8_t *second_pred, +#if CONFIG_EXT_INTER + const uint8_t *mask, int mask_stride, int invert_mask, +#endif + int w, int h, int use_upsampled_ref); extern fractional_mv_step_fp av1_find_best_sub_pixel_tree; extern fractional_mv_step_fp av1_find_best_sub_pixel_tree_pruned; @@ -113,6 +123,10 @@ typedef int (*av1_diamond_search_fn_t)( int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range, const aom_variance_fn_ptr_t *fn_ptr, +#if CONFIG_EXT_INTER + const uint8_t *mask, int mask_stride, + int invert_mask, +#endif const MV *center_mv, const uint8_t *second_pred); struct AV1_COMP; @@ -122,27 +136,6 @@ int av1_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x, int error_per_bit, int *cost_list, const MV *ref_mv, int var_max, int rd); -#if CONFIG_EXT_INTER -int av1_find_best_masked_sub_pixel_tree( - const MACROBLOCK *x, const uint8_t *mask, int mask_stride, MV *bestmv, - const MV *ref_mv, int allow_hp, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, - int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, - int is_second); -int av1_find_best_masked_sub_pixel_tree_up( - const struct AV1_COMP *cpi, MACROBLOCK *x, const uint8_t *mask, - int mask_stride, int mi_row, int mi_col, MV *bestmv, const MV *ref_mv, - int allow_hp, int error_per_bit, const aom_variance_fn_ptr_t *vfp, - int forced_stop, int iters_per_step, int *mvjcost, int *mvcost[2], - int *distortion, unsigned int *sse1, int is_second, int use_upsampled_ref); -int av1_masked_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x, - const uint8_t *mask, int mask_stride, - MV *mvp_full, int step_param, int sadpb, - int further_steps, int do_refine, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *ref_mv, MV *dst_mv, int is_second); -#endif // CONFIG_EXT_INTER - #if CONFIG_MOTION_VAR int av1_obmc_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x, MV *mvp_full, int step_param, int sadpb, @@ -160,4 +153,14 @@ int av1_find_best_obmc_sub_pixel_tree_up( } // extern "C" #endif +#if CONFIG_WARPED_MOTION +unsigned int av1_compute_motion_cost(const struct AV1_COMP *cpi, + MACROBLOCK *const x, BLOCK_SIZE bsize, + int mi_row, int mi_col, const MV *this_mv); +unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi, + MACROBLOCK *const x, BLOCK_SIZE bsize, + int mi_row, int mi_col, int *pts, + int *pts_inref); +#endif // CONFIG_WARPED_MOTION + #endif // AV1_ENCODER_MCOMP_H_ diff --git a/third_party/aom/av1/encoder/palette.c b/third_party/aom/av1/encoder/palette.c index 355141de5..235964dde 100644 --- a/third_party/aom/av1/encoder/palette.c +++ b/third_party/aom/av1/encoder/palette.c @@ -167,31 +167,58 @@ int av1_count_colors(const uint8_t *src, int stride, int rows, int cols) { } #if CONFIG_PALETTE_DELTA_ENCODING -int av1_get_palette_delta_bits_y(const PALETTE_MODE_INFO *const pmi, - int bit_depth, int *min_bits) { - const int n = pmi->palette_size[0]; - int max_d = 0, i; - *min_bits = bit_depth - 3; - for (i = 1; i < n; ++i) { - const int delta = pmi->palette_colors[i] - pmi->palette_colors[i - 1]; - assert(delta > 0); - if (delta > max_d) max_d = delta; +static int delta_encode_cost(const int *colors, int num, int bit_depth, + int min_val) { + if (num <= 0) return 0; + int bits_cost = bit_depth; + if (num == 1) return bits_cost; + bits_cost += 2; + int max_delta = 0; + int deltas[PALETTE_MAX_SIZE]; + const int min_bits = bit_depth - 3; + for (int i = 1; i < num; ++i) { + const int delta = colors[i] - colors[i - 1]; + deltas[i - 1] = delta; + assert(delta >= min_val); + if (delta > max_delta) max_delta = delta; + } + int bits_per_delta = AOMMAX(av1_ceil_log2(max_delta + 1 - min_val), min_bits); + assert(bits_per_delta <= bit_depth); + int range = (1 << bit_depth) - colors[0] - min_val; + for (int i = 0; i < num - 1; ++i) { + bits_cost += bits_per_delta; + range -= deltas[i]; + bits_per_delta = AOMMIN(bits_per_delta, av1_ceil_log2(range)); } - return AOMMAX(av1_ceil_log2(max_d), *min_bits); + return bits_cost; } -int av1_get_palette_delta_bits_u(const PALETTE_MODE_INFO *const pmi, - int bit_depth, int *min_bits) { - const int n = pmi->palette_size[1]; - int max_d = 0, i; - *min_bits = bit_depth - 3; - for (i = 1; i < n; ++i) { - const int delta = pmi->palette_colors[PALETTE_MAX_SIZE + i] - - pmi->palette_colors[PALETTE_MAX_SIZE + i - 1]; - assert(delta >= 0); - if (delta > max_d) max_d = delta; +int av1_index_color_cache(const uint16_t *color_cache, int n_cache, + const uint16_t *colors, int n_colors, + uint8_t *cache_color_found, int *out_cache_colors) { + if (n_cache <= 0) { + for (int i = 0; i < n_colors; ++i) out_cache_colors[i] = colors[i]; + return n_colors; } - return AOMMAX(av1_ceil_log2(max_d + 1), *min_bits); + memset(cache_color_found, 0, n_cache * sizeof(*cache_color_found)); + int n_in_cache = 0; + int in_cache_flags[PALETTE_MAX_SIZE]; + memset(in_cache_flags, 0, sizeof(in_cache_flags)); + for (int i = 0; i < n_cache && n_in_cache < n_colors; ++i) { + for (int j = 0; j < n_colors; ++j) { + if (colors[j] == color_cache[i]) { + in_cache_flags[j] = 1; + cache_color_found[i] = 1; + ++n_in_cache; + break; + } + } + } + int j = 0; + for (int i = 0; i < n_colors; ++i) + if (!in_cache_flags[i]) out_cache_colors[j++] = colors[i]; + assert(j == n_colors - n_in_cache); + return j; } int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi, @@ -199,10 +226,10 @@ int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi, int *min_bits) { const int n = pmi->palette_size[1]; const int max_val = 1 << bit_depth; - int max_d = 0, i; + int max_d = 0; *min_bits = bit_depth - 4; *zero_count = 0; - for (i = 1; i < n; ++i) { + for (int i = 1; i < n; ++i) { const int delta = pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] - pmi->palette_colors[2 * PALETTE_MAX_SIZE + i - 1]; const int v = abs(delta); @@ -215,26 +242,42 @@ int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi, #endif // CONFIG_PALETTE_DELTA_ENCODING int av1_palette_color_cost_y(const PALETTE_MODE_INFO *const pmi, +#if CONFIG_PALETTE_DELTA_ENCODING + uint16_t *color_cache, int n_cache, +#endif // CONFIG_PALETTE_DELTA_ENCODING int bit_depth) { const int n = pmi->palette_size[0]; #if CONFIG_PALETTE_DELTA_ENCODING - int min_bits = 0; - const int bits = av1_get_palette_delta_bits_y(pmi, bit_depth, &min_bits); - return av1_cost_bit(128, 0) * (2 + bit_depth + bits * (n - 1)); + int out_cache_colors[PALETTE_MAX_SIZE]; + uint8_t cache_color_found[2 * PALETTE_MAX_SIZE]; + const int n_out_cache = + av1_index_color_cache(color_cache, n_cache, pmi->palette_colors, n, + cache_color_found, out_cache_colors); + const int total_bits = + n_cache + delta_encode_cost(out_cache_colors, n_out_cache, bit_depth, 1); + return total_bits * av1_cost_bit(128, 0); #else return bit_depth * n * av1_cost_bit(128, 0); #endif // CONFIG_PALETTE_DELTA_ENCODING } int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi, +#if CONFIG_PALETTE_DELTA_ENCODING + uint16_t *color_cache, int n_cache, +#endif // CONFIG_PALETTE_DELTA_ENCODING int bit_depth) { const int n = pmi->palette_size[1]; #if CONFIG_PALETTE_DELTA_ENCODING - int cost = 0; + int total_bits = 0; // U channel palette color cost. - int min_bits_u = 0; - const int bits_u = av1_get_palette_delta_bits_u(pmi, bit_depth, &min_bits_u); - cost += av1_cost_bit(128, 0) * (2 + bit_depth + bits_u * (n - 1)); + int out_cache_colors[PALETTE_MAX_SIZE]; + uint8_t cache_color_found[2 * PALETTE_MAX_SIZE]; + const int n_out_cache = av1_index_color_cache( + color_cache, n_cache, pmi->palette_colors + PALETTE_MAX_SIZE, n, + cache_color_found, out_cache_colors); + total_bits += + n_cache + delta_encode_cost(out_cache_colors, n_out_cache, bit_depth, 0); + // V channel palette color cost. int zero_count = 0, min_bits_v = 0; const int bits_v = @@ -242,8 +285,8 @@ int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi, const int bits_using_delta = 2 + bit_depth + (bits_v + 1) * (n - 1) - zero_count; const int bits_using_raw = bit_depth * n; - cost += av1_cost_bit(128, 0) * (1 + AOMMIN(bits_using_delta, bits_using_raw)); - return cost; + total_bits += 1 + AOMMIN(bits_using_delta, bits_using_raw); + return total_bits * av1_cost_bit(128, 0); #else return 2 * bit_depth * n * av1_cost_bit(128, 0); #endif // CONFIG_PALETTE_DELTA_ENCODING diff --git a/third_party/aom/av1/encoder/palette.h b/third_party/aom/av1/encoder/palette.h index 5403ac5e6..f5a3c1bdd 100644 --- a/third_party/aom/av1/encoder/palette.h +++ b/third_party/aom/av1/encoder/palette.h @@ -45,13 +45,12 @@ int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols, #endif // CONFIG_HIGHBITDEPTH #if CONFIG_PALETTE_DELTA_ENCODING -// Return the number of bits used to transmit each luma palette color delta. -int av1_get_palette_delta_bits_y(const PALETTE_MODE_INFO *const pmi, - int bit_depth, int *min_bits); - -// Return the number of bits used to transmit each U palette color delta. -int av1_get_palette_delta_bits_u(const PALETTE_MODE_INFO *const pmi, - int bit_depth, int *min_bits); +// Given a color cache and a set of base colors, find if each cache color is +// present in the base colors, record the binary results in "cache_color_found". +// Record the colors that are not in the color cache in "out_cache_colors". +int av1_index_color_cache(const uint16_t *color_cache, int n_cache, + const uint16_t *colors, int n_colors, + uint8_t *cache_color_found, int *out_cache_colors); // Return the number of bits used to transmit each v palette color delta; // assign zero_count with the number of deltas being 0. @@ -60,10 +59,17 @@ int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi, #endif // CONFIG_PALETTE_DELTA_ENCODING // Return the rate cost for transmitting luma palette color values. -int av1_palette_color_cost_y(const PALETTE_MODE_INFO *const pmi, int bit_depth); +int av1_palette_color_cost_y(const PALETTE_MODE_INFO *const pmi, +#if CONFIG_PALETTE_DELTA_ENCODING + uint16_t *color_cache, int n_cache, +#endif // CONFIG_PALETTE_DELTA_ENCODING + int bit_depth); // Return the rate cost for transmitting chroma palette color values. int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi, +#if CONFIG_PALETTE_DELTA_ENCODING + uint16_t *color_cache, int n_cache, +#endif // CONFIG_PALETTE_DELTA_ENCODING int bit_depth); #ifdef __cplusplus diff --git a/third_party/aom/av1/encoder/pickrst.c b/third_party/aom/av1/encoder/pickrst.c index 21410e0af..4a446d24e 100644 --- a/third_party/aom/av1/encoder/pickrst.c +++ b/third_party/aom/av1/encoder/pickrst.c @@ -31,17 +31,18 @@ #include "av1/encoder/encoder.h" #include "av1/encoder/picklpf.h" #include "av1/encoder/pickrst.h" +#include "av1/encoder/mathutils.h" // When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed. // When set to RESTORE_NONE (0) we allow switchable. const RestorationType force_restore_type = RESTORE_NONE; // Number of Wiener iterations -#define NUM_WIENER_ITERS 10 +#define NUM_WIENER_ITERS 5 typedef double (*search_restore_type)(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int partial_frame, - RestorationInfo *info, + int plane, RestorationInfo *info, RestorationType *rest_level, double *best_tile_cost, YV12_BUFFER_CONFIG *dst_frame); @@ -216,6 +217,62 @@ static int64_t get_pixel_proj_error(uint8_t *src8, int width, int height, return err; } +#define USE_SGRPROJ_REFINEMENT_SEARCH 1 +static int64_t finer_search_pixel_proj_error( + uint8_t *src8, int width, int height, int src_stride, uint8_t *dat8, + int dat_stride, int bit_depth, int32_t *flt1, int flt1_stride, + int32_t *flt2, int flt2_stride, int start_step, int *xqd) { + int64_t err = get_pixel_proj_error(src8, width, height, src_stride, dat8, + dat_stride, bit_depth, flt1, flt1_stride, + flt2, flt2_stride, xqd); + (void)start_step; +#if USE_SGRPROJ_REFINEMENT_SEARCH + int64_t err2; + int tap_min[] = { SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MIN1 }; + int tap_max[] = { SGRPROJ_PRJ_MAX0, SGRPROJ_PRJ_MAX1 }; + for (int s = start_step; s >= 1; s >>= 1) { + for (int p = 0; p < 2; ++p) { + int skip = 0; + do { + if (xqd[p] - s >= tap_min[p]) { + xqd[p] -= s; + err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8, + dat_stride, bit_depth, flt1, flt1_stride, + flt2, flt2_stride, xqd); + if (err2 > err) { + xqd[p] += s; + } else { + err = err2; + skip = 1; + // At the highest step size continue moving in the same direction + if (s == start_step) continue; + } + } + break; + } while (1); + if (skip) break; + do { + if (xqd[p] + s <= tap_max[p]) { + xqd[p] += s; + err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8, + dat_stride, bit_depth, flt1, flt1_stride, + flt2, flt2_stride, xqd); + if (err2 > err) { + xqd[p] -= s; + } else { + err = err2; + // At the highest step size continue moving in the same direction + if (s == start_step) continue; + } + } + break; + } while (1); + } + } +#endif // USE_SGRPROJ_REFINEMENT_SEARCH + return err; +} + static void get_proj_subspace(uint8_t *src8, int width, int height, int src_stride, uint8_t *dat8, int dat_stride, int bit_depth, int32_t *flt1, int flt1_stride, @@ -329,12 +386,14 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height, #if CONFIG_HIGHBITDEPTH } #endif + aom_clear_system_state(); get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride, bit_depth, flt1, width, flt2, width, exq); + aom_clear_system_state(); encode_xq(exq, exqd); - err = - get_pixel_proj_error(src8, width, height, src_stride, dat8, dat_stride, - bit_depth, flt1, width, flt2, width, exqd); + err = finer_search_pixel_proj_error(src8, width, height, src_stride, dat8, + dat_stride, bit_depth, flt1, width, + flt2, width, 2, exqd); if (besterr == -1 || err < besterr) { bestep = ep; besterr = err; @@ -362,8 +421,9 @@ static int count_sgrproj_bits(SgrprojInfo *sgrproj_info, } static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, - int partial_frame, RestorationInfo *info, - RestorationType *type, double *best_tile_cost, + int partial_frame, int plane, + RestorationInfo *info, RestorationType *type, + double *best_tile_cost, YV12_BUFFER_CONFIG *dst_frame) { SgrprojInfo *sgrproj_info = info->sgrproj_info; double err, cost_norestore, cost_sgrproj; @@ -374,44 +434,68 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, RestorationInfo *rsi = &cpi->rst_search[0]; int tile_idx, tile_width, tile_height, nhtiles, nvtiles; int h_start, h_end, v_start, v_end; - // Allocate for the src buffer at high precision - const int ntiles = av1_get_rest_ntiles( - cm->width, cm->height, cm->rst_info[0].restoration_tilesize, &tile_width, - &tile_height, &nhtiles, &nvtiles); + int width, height, src_stride, dgd_stride; + uint8_t *dgd_buffer, *src_buffer; + if (plane == AOM_PLANE_Y) { + width = cm->width; + height = cm->height; + src_buffer = src->y_buffer; + src_stride = src->y_stride; + dgd_buffer = dgd->y_buffer; + dgd_stride = dgd->y_stride; + assert(width == dgd->y_crop_width); + assert(height == dgd->y_crop_height); + assert(width == src->y_crop_width); + assert(height == src->y_crop_height); + } else { + width = src->uv_crop_width; + height = src->uv_crop_height; + src_stride = src->uv_stride; + dgd_stride = dgd->uv_stride; + src_buffer = plane == AOM_PLANE_U ? src->u_buffer : src->v_buffer; + dgd_buffer = plane == AOM_PLANE_U ? dgd->u_buffer : dgd->v_buffer; + assert(width == dgd->uv_crop_width); + assert(height == dgd->uv_crop_height); + } + const int ntiles = + av1_get_rest_ntiles(width, height, cm->rst_info[0].restoration_tilesize, + &tile_width, &tile_height, &nhtiles, &nvtiles); SgrprojInfo ref_sgrproj_info; set_default_sgrproj(&ref_sgrproj_info); - rsi->frame_restoration_type = RESTORE_SGRPROJ; + rsi[plane].frame_restoration_type = RESTORE_SGRPROJ; for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - rsi->restoration_type[tile_idx] = RESTORE_NONE; + rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; } // Compute best Sgrproj filters for each tile for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width, - tile_height, cm->width, cm->height, 0, 0, &h_start, - &h_end, &v_start, &v_end); + tile_height, width, height, 0, 0, &h_start, &h_end, + &v_start, &v_end); err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start, - h_end - h_start, v_start, v_end - v_start, 1); + h_end - h_start, v_start, v_end - v_start, + (1 << plane)); // #bits when a tile is not restored bits = av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 0); cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); best_tile_cost[tile_idx] = DBL_MAX; search_selfguided_restoration( - dgd->y_buffer + v_start * dgd->y_stride + h_start, h_end - h_start, - v_end - v_start, dgd->y_stride, - src->y_buffer + v_start * src->y_stride + h_start, src->y_stride, + dgd_buffer + v_start * dgd_stride + h_start, h_end - h_start, + v_end - v_start, dgd_stride, + src_buffer + v_start * src_stride + h_start, src_stride, #if CONFIG_HIGHBITDEPTH cm->bit_depth, #else 8, #endif // CONFIG_HIGHBITDEPTH - &rsi->sgrproj_info[tile_idx].ep, rsi->sgrproj_info[tile_idx].xqd, - cm->rst_internal.tmpbuf); - rsi->restoration_type[tile_idx] = RESTORE_SGRPROJ; - err = try_restoration_tile(src, cpi, rsi, 1, partial_frame, tile_idx, 0, 0, - dst_frame); - bits = count_sgrproj_bits(&rsi->sgrproj_info[tile_idx], &ref_sgrproj_info) + &rsi[plane].sgrproj_info[tile_idx].ep, + rsi[plane].sgrproj_info[tile_idx].xqd, cm->rst_internal.tmpbuf); + rsi[plane].restoration_type[tile_idx] = RESTORE_SGRPROJ; + err = try_restoration_tile(src, cpi, rsi, (1 << plane), partial_frame, + tile_idx, 0, 0, dst_frame); + bits = count_sgrproj_bits(&rsi[plane].sgrproj_info[tile_idx], + &ref_sgrproj_info) << AV1_PROB_COST_SHIFT; bits += av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 1); cost_sgrproj = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); @@ -419,35 +503,34 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, type[tile_idx] = RESTORE_NONE; } else { type[tile_idx] = RESTORE_SGRPROJ; - memcpy(&sgrproj_info[tile_idx], &rsi->sgrproj_info[tile_idx], + memcpy(&sgrproj_info[tile_idx], &rsi[plane].sgrproj_info[tile_idx], sizeof(sgrproj_info[tile_idx])); - bits = count_sgrproj_bits(&rsi->sgrproj_info[tile_idx], &ref_sgrproj_info) - << AV1_PROB_COST_SHIFT; memcpy(&ref_sgrproj_info, &sgrproj_info[tile_idx], sizeof(ref_sgrproj_info)); best_tile_cost[tile_idx] = err; } - rsi->restoration_type[tile_idx] = RESTORE_NONE; + rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; } // Cost for Sgrproj filtering set_default_sgrproj(&ref_sgrproj_info); - bits = frame_level_restore_bits[rsi->frame_restoration_type] + bits = frame_level_restore_bits[rsi[plane].frame_restoration_type] << AV1_PROB_COST_SHIFT; for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { bits += av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, type[tile_idx] != RESTORE_NONE); - memcpy(&rsi->sgrproj_info[tile_idx], &sgrproj_info[tile_idx], + memcpy(&rsi[plane].sgrproj_info[tile_idx], &sgrproj_info[tile_idx], sizeof(sgrproj_info[tile_idx])); if (type[tile_idx] == RESTORE_SGRPROJ) { - bits += - count_sgrproj_bits(&rsi->sgrproj_info[tile_idx], &ref_sgrproj_info) - << AV1_PROB_COST_SHIFT; - memcpy(&ref_sgrproj_info, &rsi->sgrproj_info[tile_idx], + bits += count_sgrproj_bits(&rsi[plane].sgrproj_info[tile_idx], + &ref_sgrproj_info) + << AV1_PROB_COST_SHIFT; + memcpy(&ref_sgrproj_info, &rsi[plane].sgrproj_info[tile_idx], sizeof(ref_sgrproj_info)); } - rsi->restoration_type[tile_idx] = type[tile_idx]; + rsi[plane].restoration_type[tile_idx] = type[tile_idx]; } - err = try_restoration_frame(src, cpi, rsi, 1, partial_frame, dst_frame); + err = try_restoration_frame(src, cpi, rsi, (1 << plane), partial_frame, + dst_frame); cost_sgrproj = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); return cost_sgrproj; @@ -560,46 +643,6 @@ static void compute_stats_highbd(uint8_t *dgd8, uint8_t *src8, int h_start, } #endif // CONFIG_HIGHBITDEPTH -// Solves Ax = b, where x and b are column vectors -static int linsolve(int n, double *A, int stride, double *b, double *x) { - int i, j, k; - double c; - - aom_clear_system_state(); - - // Forward elimination - for (k = 0; k < n - 1; k++) { - // Bring the largest magitude to the diagonal position - for (i = n - 1; i > k; i--) { - if (fabs(A[(i - 1) * stride + k]) < fabs(A[i * stride + k])) { - for (j = 0; j < n; j++) { - c = A[i * stride + j]; - A[i * stride + j] = A[(i - 1) * stride + j]; - A[(i - 1) * stride + j] = c; - } - c = b[i]; - b[i] = b[i - 1]; - b[i - 1] = c; - } - } - for (i = k; i < n - 1; i++) { - if (fabs(A[k * stride + k]) < 1e-10) return 0; - c = A[(i + 1) * stride + k] / A[k * stride + k]; - for (j = 0; j < n; j++) A[(i + 1) * stride + j] -= c * A[k * stride + j]; - b[i + 1] -= c * b[k]; - } - } - // Backward substitution - for (i = n - 1; i >= 0; i--) { - if (fabs(A[i * stride + i]) < 1e-10) return 0; - c = 0; - for (j = i + 1; j <= n - 1; j++) c += A[i * stride + j] * x[j]; - x[i] = (b[i] - c) / A[i * stride + i]; - } - - return 1; -} - static INLINE int wrap_index(int i) { return (i >= WIENER_HALFWIN1 ? WIENER_WIN - 1 - i : i); } @@ -696,8 +739,10 @@ static void update_b_sep_sym(double **Mc, double **Hc, double *a, double *b) { static int wiener_decompose_sep_sym(double *M, double *H, double *a, double *b) { - static const double init_filt[WIENER_WIN] = { - 0.035623, -0.127154, 0.211436, 0.760190, 0.211436, -0.127154, 0.035623, + static const int init_filt[WIENER_WIN] = { + WIENER_FILT_TAP0_MIDV, WIENER_FILT_TAP1_MIDV, WIENER_FILT_TAP2_MIDV, + WIENER_FILT_TAP3_MIDV, WIENER_FILT_TAP2_MIDV, WIENER_FILT_TAP1_MIDV, + WIENER_FILT_TAP0_MIDV, }; int i, j, iter; double *Hc[WIENER_WIN2]; @@ -709,8 +754,9 @@ static int wiener_decompose_sep_sym(double *M, double *H, double *a, H + i * WIENER_WIN * WIENER_WIN2 + j * WIENER_WIN; } } - memcpy(a, init_filt, sizeof(*a) * WIENER_WIN); - memcpy(b, init_filt, sizeof(*b) * WIENER_WIN); + for (i = 0; i < WIENER_WIN; i++) { + a[i] = b[i] = (double)init_filt[i] / WIENER_FILT_STEP; + } iter = 1; while (iter < NUM_WIENER_ITERS) { @@ -812,158 +858,117 @@ static int count_wiener_bits(WienerInfo *wiener_info, return bits; } -static double search_wiener_uv(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, - int partial_frame, int plane, - RestorationInfo *info, RestorationType *type, - YV12_BUFFER_CONFIG *dst_frame) { - WienerInfo *wiener_info = info->wiener_info; - AV1_COMMON *const cm = &cpi->common; - RestorationInfo *rsi = cpi->rst_search; - int64_t err; - int bits; - double cost_wiener, cost_norestore, cost_wiener_frame, cost_norestore_frame; - MACROBLOCK *x = &cpi->td.mb; - double M[WIENER_WIN2]; - double H[WIENER_WIN2 * WIENER_WIN2]; - double vfilterd[WIENER_WIN], hfilterd[WIENER_WIN]; - const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show; - const int width = src->uv_crop_width; - const int height = src->uv_crop_height; - const int src_stride = src->uv_stride; - const int dgd_stride = dgd->uv_stride; - double score; - int tile_idx, tile_width, tile_height, nhtiles, nvtiles; - int h_start, h_end, v_start, v_end; - const int ntiles = - av1_get_rest_ntiles(width, height, cm->rst_info[1].restoration_tilesize, - &tile_width, &tile_height, &nhtiles, &nvtiles); - WienerInfo ref_wiener_info; - set_default_wiener(&ref_wiener_info); - assert(width == dgd->uv_crop_width); - assert(height == dgd->uv_crop_height); - - rsi[plane].frame_restoration_type = RESTORE_NONE; - err = sse_restoration_frame(cm, src, cm->frame_to_show, (1 << plane)); - bits = 0; - cost_norestore_frame = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); - - rsi[plane].frame_restoration_type = RESTORE_WIENER; - - for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; - } - - // Compute best Wiener filters for each tile - for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width, - tile_height, width, height, 0, 0, &h_start, &h_end, - &v_start, &v_end); - err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start, - h_end - h_start, v_start, v_end - v_start, - 1 << plane); - // #bits when a tile is not restored - bits = av1_cost_bit(RESTORE_NONE_WIENER_PROB, 0); - cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); - // best_tile_cost[tile_idx] = DBL_MAX; - - av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width, - tile_height, width, height, WIENER_HALFWIN, - WIENER_HALFWIN, &h_start, &h_end, &v_start, - &v_end); - if (plane == AOM_PLANE_U) { -#if CONFIG_HIGHBITDEPTH - if (cm->use_highbitdepth) - compute_stats_highbd(dgd->u_buffer, src->u_buffer, h_start, h_end, - v_start, v_end, dgd_stride, src_stride, M, H); - else -#endif // CONFIG_HIGHBITDEPTH - compute_stats(dgd->u_buffer, src->u_buffer, h_start, h_end, v_start, - v_end, dgd_stride, src_stride, M, H); - } else if (plane == AOM_PLANE_V) { -#if CONFIG_HIGHBITDEPTH - if (cm->use_highbitdepth) - compute_stats_highbd(dgd->v_buffer, src->v_buffer, h_start, h_end, - v_start, v_end, dgd_stride, src_stride, M, H); - else -#endif // CONFIG_HIGHBITDEPTH - compute_stats(dgd->v_buffer, src->v_buffer, h_start, h_end, v_start, - v_end, dgd_stride, src_stride, M, H); - } else { - assert(0); - } - - type[tile_idx] = RESTORE_WIENER; - - if (!wiener_decompose_sep_sym(M, H, vfilterd, hfilterd)) { - type[tile_idx] = RESTORE_NONE; - continue; - } - quantize_sym_filter(vfilterd, rsi[plane].wiener_info[tile_idx].vfilter); - quantize_sym_filter(hfilterd, rsi[plane].wiener_info[tile_idx].hfilter); - - // Filter score computes the value of the function x'*A*x - x'*b for the - // learned filter and compares it against identity filer. If there is no - // reduction in the function, the filter is reverted back to identity - score = compute_score(M, H, rsi[plane].wiener_info[tile_idx].vfilter, - rsi[plane].wiener_info[tile_idx].hfilter); - if (score > 0.0) { - type[tile_idx] = RESTORE_NONE; - continue; - } - - rsi[plane].restoration_type[tile_idx] = RESTORE_WIENER; - err = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, - tile_idx, 0, 0, dst_frame); - bits = - count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info) - << AV1_PROB_COST_SHIFT; - // bits = WIENER_FILT_BITS << AV1_PROB_COST_SHIFT; - bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, 1); - cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); - if (cost_wiener >= cost_norestore) { - type[tile_idx] = RESTORE_NONE; - } else { - type[tile_idx] = RESTORE_WIENER; - memcpy(&wiener_info[tile_idx], &rsi[plane].wiener_info[tile_idx], - sizeof(wiener_info[tile_idx])); - memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx], - sizeof(ref_wiener_info)); +#define USE_WIENER_REFINEMENT_SEARCH 1 +static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src, + AV1_COMP *cpi, RestorationInfo *rsi, + int start_step, int plane, int tile_idx, + int partial_frame, + YV12_BUFFER_CONFIG *dst_frame) { + int64_t err = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, + tile_idx, 0, 0, dst_frame); + (void)start_step; +#if USE_WIENER_REFINEMENT_SEARCH + int64_t err2; + int tap_min[] = { WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP1_MINV, + WIENER_FILT_TAP2_MINV }; + int tap_max[] = { WIENER_FILT_TAP0_MAXV, WIENER_FILT_TAP1_MAXV, + WIENER_FILT_TAP2_MAXV }; + // printf("err pre = %"PRId64"\n", err); + for (int s = start_step; s >= 1; s >>= 1) { + for (int p = 0; p < WIENER_HALFWIN; ++p) { + int skip = 0; + do { + if (rsi[plane].wiener_info[tile_idx].hfilter[p] - s >= tap_min[p]) { + rsi[plane].wiener_info[tile_idx].hfilter[p] -= s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] -= s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] += 2 * s; + err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, + tile_idx, 0, 0, dst_frame); + if (err2 > err) { + rsi[plane].wiener_info[tile_idx].hfilter[p] += s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] += s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] -= 2 * s; + } else { + err = err2; + skip = 1; + // At the highest step size continue moving in the same direction + if (s == start_step) continue; + } + } + break; + } while (1); + if (skip) break; + do { + if (rsi[plane].wiener_info[tile_idx].hfilter[p] + s <= tap_max[p]) { + rsi[plane].wiener_info[tile_idx].hfilter[p] += s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] += s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] -= 2 * s; + err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, + tile_idx, 0, 0, dst_frame); + if (err2 > err) { + rsi[plane].wiener_info[tile_idx].hfilter[p] -= s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] -= s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] += 2 * s; + } else { + err = err2; + // At the highest step size continue moving in the same direction + if (s == start_step) continue; + } + } + break; + } while (1); } - rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; - } - // Cost for Wiener filtering - set_default_wiener(&ref_wiener_info); - bits = 0; - for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - bits += - av1_cost_bit(RESTORE_NONE_WIENER_PROB, type[tile_idx] != RESTORE_NONE); - memcpy(&rsi[plane].wiener_info[tile_idx], &wiener_info[tile_idx], - sizeof(wiener_info[tile_idx])); - if (type[tile_idx] == RESTORE_WIENER) { - bits += - count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info) - << AV1_PROB_COST_SHIFT; - memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx], - sizeof(ref_wiener_info)); + for (int p = 0; p < WIENER_HALFWIN; ++p) { + int skip = 0; + do { + if (rsi[plane].wiener_info[tile_idx].vfilter[p] - s >= tap_min[p]) { + rsi[plane].wiener_info[tile_idx].vfilter[p] -= s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] -= s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] += 2 * s; + err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, + tile_idx, 0, 0, dst_frame); + if (err2 > err) { + rsi[plane].wiener_info[tile_idx].vfilter[p] += s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] += s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] -= 2 * s; + } else { + err = err2; + skip = 1; + // At the highest step size continue moving in the same direction + if (s == start_step) continue; + } + } + break; + } while (1); + if (skip) break; + do { + if (rsi[plane].wiener_info[tile_idx].vfilter[p] + s <= tap_max[p]) { + rsi[plane].wiener_info[tile_idx].vfilter[p] += s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] += s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] -= 2 * s; + err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, + tile_idx, 0, 0, dst_frame); + if (err2 > err) { + rsi[plane].wiener_info[tile_idx].vfilter[p] -= s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] -= s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] += 2 * s; + } else { + err = err2; + // At the highest step size continue moving in the same direction + if (s == start_step) continue; + } + } + break; + } while (1); } - rsi[plane].restoration_type[tile_idx] = type[tile_idx]; - } - err = try_restoration_frame(src, cpi, rsi, 1 << plane, partial_frame, - dst_frame); - cost_wiener_frame = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); - - if (cost_wiener_frame < cost_norestore_frame) { - info->frame_restoration_type = RESTORE_WIENER; - } else { - info->frame_restoration_type = RESTORE_NONE; } - - return info->frame_restoration_type == RESTORE_WIENER ? cost_wiener_frame - : cost_norestore_frame; +// printf("err post = %"PRId64"\n", err); +#endif // USE_WIENER_REFINEMENT_SEARCH + return err; } static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, - int partial_frame, RestorationInfo *info, + int partial_frame, int plane, RestorationInfo *info, RestorationType *type, double *best_tile_cost, YV12_BUFFER_CONFIG *dst_frame) { WienerInfo *wiener_info = info->wiener_info; @@ -977,38 +982,52 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, double H[WIENER_WIN2 * WIENER_WIN2]; double vfilterd[WIENER_WIN], hfilterd[WIENER_WIN]; const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show; - const int width = cm->width; - const int height = cm->height; - const int src_stride = src->y_stride; - const int dgd_stride = dgd->y_stride; + int width, height, src_stride, dgd_stride; + uint8_t *dgd_buffer, *src_buffer; + if (plane == AOM_PLANE_Y) { + width = cm->width; + height = cm->height; + src_buffer = src->y_buffer; + src_stride = src->y_stride; + dgd_buffer = dgd->y_buffer; + dgd_stride = dgd->y_stride; + assert(width == dgd->y_crop_width); + assert(height == dgd->y_crop_height); + assert(width == src->y_crop_width); + assert(height == src->y_crop_height); + } else { + width = src->uv_crop_width; + height = src->uv_crop_height; + src_stride = src->uv_stride; + dgd_stride = dgd->uv_stride; + src_buffer = plane == AOM_PLANE_U ? src->u_buffer : src->v_buffer; + dgd_buffer = plane == AOM_PLANE_U ? dgd->u_buffer : dgd->v_buffer; + assert(width == dgd->uv_crop_width); + assert(height == dgd->uv_crop_height); + } double score; int tile_idx, tile_width, tile_height, nhtiles, nvtiles; int h_start, h_end, v_start, v_end; - const int ntiles = - av1_get_rest_ntiles(width, height, cm->rst_info[0].restoration_tilesize, - &tile_width, &tile_height, &nhtiles, &nvtiles); + const int ntiles = av1_get_rest_ntiles( + width, height, cm->rst_info[plane].restoration_tilesize, &tile_width, + &tile_height, &nhtiles, &nvtiles); WienerInfo ref_wiener_info; set_default_wiener(&ref_wiener_info); - assert(width == dgd->y_crop_width); - assert(height == dgd->y_crop_height); - assert(width == src->y_crop_width); - assert(height == src->y_crop_height); - - rsi->frame_restoration_type = RESTORE_WIENER; + rsi[plane].frame_restoration_type = RESTORE_WIENER; for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - rsi->restoration_type[tile_idx] = RESTORE_NONE; + rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; } // Construct a (WIENER_HALFWIN)-pixel border around the frame #if CONFIG_HIGHBITDEPTH if (cm->use_highbitdepth) - extend_frame_highbd(CONVERT_TO_SHORTPTR(dgd->y_buffer), width, height, + extend_frame_highbd(CONVERT_TO_SHORTPTR(dgd_buffer), width, height, dgd_stride); else #endif - extend_frame(dgd->y_buffer, width, height, dgd_stride); + extend_frame(dgd_buffer, width, height, dgd_stride); // Compute best Wiener filters for each tile for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { @@ -1016,7 +1035,8 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, tile_height, width, height, 0, 0, &h_start, &h_end, &v_start, &v_end); err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start, - h_end - h_start, v_start, v_end - v_start, 1); + h_end - h_start, v_start, v_end - v_start, + (1 << plane)); // #bits when a tile is not restored bits = av1_cost_bit(RESTORE_NONE_WIENER_PROB, 0); cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); @@ -1027,12 +1047,12 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, &v_start, &v_end); #if CONFIG_HIGHBITDEPTH if (cm->use_highbitdepth) - compute_stats_highbd(dgd->y_buffer, src->y_buffer, h_start, h_end, - v_start, v_end, dgd_stride, src_stride, M, H); + compute_stats_highbd(dgd_buffer, src_buffer, h_start, h_end, v_start, + v_end, dgd_stride, src_stride, M, H); else #endif // CONFIG_HIGHBITDEPTH - compute_stats(dgd->y_buffer, src->y_buffer, h_start, h_end, v_start, - v_end, dgd_stride, src_stride, M, H); + compute_stats(dgd_buffer, src_buffer, h_start, h_end, v_start, v_end, + dgd_stride, src_stride, M, H); type[tile_idx] = RESTORE_WIENER; @@ -1040,108 +1060,129 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, type[tile_idx] = RESTORE_NONE; continue; } - quantize_sym_filter(vfilterd, rsi->wiener_info[tile_idx].vfilter); - quantize_sym_filter(hfilterd, rsi->wiener_info[tile_idx].hfilter); + quantize_sym_filter(vfilterd, rsi[plane].wiener_info[tile_idx].vfilter); + quantize_sym_filter(hfilterd, rsi[plane].wiener_info[tile_idx].hfilter); // Filter score computes the value of the function x'*A*x - x'*b for the // learned filter and compares it against identity filer. If there is no // reduction in the function, the filter is reverted back to identity - score = compute_score(M, H, rsi->wiener_info[tile_idx].vfilter, - rsi->wiener_info[tile_idx].hfilter); + score = compute_score(M, H, rsi[plane].wiener_info[tile_idx].vfilter, + rsi[plane].wiener_info[tile_idx].hfilter); if (score > 0.0) { type[tile_idx] = RESTORE_NONE; continue; } + aom_clear_system_state(); - rsi->restoration_type[tile_idx] = RESTORE_WIENER; - err = try_restoration_tile(src, cpi, rsi, 1, partial_frame, tile_idx, 0, 0, - dst_frame); - bits = count_wiener_bits(&rsi->wiener_info[tile_idx], &ref_wiener_info) - << AV1_PROB_COST_SHIFT; + rsi[plane].restoration_type[tile_idx] = RESTORE_WIENER; + err = finer_tile_search_wiener(src, cpi, rsi, 4, plane, tile_idx, + partial_frame, dst_frame); + bits = + count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info) + << AV1_PROB_COST_SHIFT; bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, 1); cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); if (cost_wiener >= cost_norestore) { type[tile_idx] = RESTORE_NONE; } else { type[tile_idx] = RESTORE_WIENER; - memcpy(&wiener_info[tile_idx], &rsi->wiener_info[tile_idx], + memcpy(&wiener_info[tile_idx], &rsi[plane].wiener_info[tile_idx], sizeof(wiener_info[tile_idx])); - memcpy(&ref_wiener_info, &rsi->wiener_info[tile_idx], + memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx], sizeof(ref_wiener_info)); - bits = count_wiener_bits(&wiener_info[tile_idx], &ref_wiener_info) - << AV1_PROB_COST_SHIFT; best_tile_cost[tile_idx] = err; } - rsi->restoration_type[tile_idx] = RESTORE_NONE; + rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; } // Cost for Wiener filtering set_default_wiener(&ref_wiener_info); - bits = frame_level_restore_bits[rsi->frame_restoration_type] + bits = frame_level_restore_bits[rsi[plane].frame_restoration_type] << AV1_PROB_COST_SHIFT; for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, type[tile_idx] != RESTORE_NONE); - memcpy(&rsi->wiener_info[tile_idx], &wiener_info[tile_idx], + memcpy(&rsi[plane].wiener_info[tile_idx], &wiener_info[tile_idx], sizeof(wiener_info[tile_idx])); if (type[tile_idx] == RESTORE_WIENER) { - bits += count_wiener_bits(&rsi->wiener_info[tile_idx], &ref_wiener_info) - << AV1_PROB_COST_SHIFT; - memcpy(&ref_wiener_info, &rsi->wiener_info[tile_idx], + bits += + count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info) + << AV1_PROB_COST_SHIFT; + memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx], sizeof(ref_wiener_info)); } - rsi->restoration_type[tile_idx] = type[tile_idx]; + rsi[plane].restoration_type[tile_idx] = type[tile_idx]; } - err = try_restoration_frame(src, cpi, rsi, 1, partial_frame, dst_frame); + err = try_restoration_frame(src, cpi, rsi, 1 << plane, partial_frame, + dst_frame); cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); return cost_wiener; } static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, - int partial_frame, RestorationInfo *info, - RestorationType *type, double *best_tile_cost, + int partial_frame, int plane, + RestorationInfo *info, RestorationType *type, + double *best_tile_cost, YV12_BUFFER_CONFIG *dst_frame) { - double err, cost_norestore; + int64_t err; + double cost_norestore; int bits; MACROBLOCK *x = &cpi->td.mb; AV1_COMMON *const cm = &cpi->common; int tile_idx, tile_width, tile_height, nhtiles, nvtiles; int h_start, h_end, v_start, v_end; + int width, height; + if (plane == AOM_PLANE_Y) { + width = cm->width; + height = cm->height; + } else { + width = src->uv_crop_width; + height = src->uv_crop_height; + } const int ntiles = av1_get_rest_ntiles( - cm->width, cm->height, cm->rst_info[0].restoration_tilesize, &tile_width, + width, height, cm->rst_info[plane].restoration_tilesize, &tile_width, &tile_height, &nhtiles, &nvtiles); (void)info; (void)dst_frame; (void)partial_frame; + info->frame_restoration_type = RESTORE_NONE; for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width, - tile_height, cm->width, cm->height, 0, 0, &h_start, - &h_end, &v_start, &v_end); + tile_height, width, height, 0, 0, &h_start, &h_end, + &v_start, &v_end); err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start, - h_end - h_start, v_start, v_end - v_start, 1); + h_end - h_start, v_start, v_end - v_start, + 1 << plane); type[tile_idx] = RESTORE_NONE; best_tile_cost[tile_idx] = err; } // RD cost associated with no restoration - err = sse_restoration_tile(src, cm->frame_to_show, cm, 0, cm->width, 0, - cm->height, 1); + err = sse_restoration_frame(cm, src, cm->frame_to_show, (1 << plane)); bits = frame_level_restore_bits[RESTORE_NONE] << AV1_PROB_COST_SHIFT; cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); return cost_norestore; } static double search_switchable_restoration( - AV1_COMP *cpi, int partial_frame, RestorationInfo *rsi, + AV1_COMP *cpi, int partial_frame, int plane, RestorationInfo *rsi, double *tile_cost[RESTORE_SWITCHABLE_TYPES]) { AV1_COMMON *const cm = &cpi->common; MACROBLOCK *x = &cpi->td.mb; double cost_switchable = 0; int bits, tile_idx; RestorationType r; - const int ntiles = av1_get_rest_ntiles(cm->width, cm->height, - cm->rst_info[0].restoration_tilesize, - NULL, NULL, NULL, NULL); + int width, height; + if (plane == AOM_PLANE_Y) { + width = cm->width; + height = cm->height; + } else { + width = ROUND_POWER_OF_TWO(cm->width, cm->subsampling_x); + height = ROUND_POWER_OF_TWO(cm->height, cm->subsampling_y); + } + const int ntiles = av1_get_rest_ntiles( + width, height, cm->rst_info[plane].restoration_tilesize, NULL, NULL, NULL, + NULL); SgrprojInfo ref_sgrproj_info; set_default_sgrproj(&ref_sgrproj_info); WienerInfo ref_wiener_info; @@ -1203,57 +1244,60 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, double best_cost_restore; RestorationType r, best_restore; - const int ntiles = av1_get_rest_ntiles(cm->width, cm->height, - cm->rst_info[0].restoration_tilesize, - NULL, NULL, NULL, NULL); + const int ntiles_y = av1_get_rest_ntiles(cm->width, cm->height, + cm->rst_info[0].restoration_tilesize, + NULL, NULL, NULL, NULL); + const int ntiles_uv = av1_get_rest_ntiles( + ROUND_POWER_OF_TWO(cm->width, cm->subsampling_x), + ROUND_POWER_OF_TWO(cm->height, cm->subsampling_y), + cm->rst_info[1].restoration_tilesize, NULL, NULL, NULL, NULL); + // Assume ntiles_uv is never larger that ntiles_y and so the same arrays work. for (r = 0; r < RESTORE_SWITCHABLE_TYPES; r++) { - tile_cost[r] = (double *)aom_malloc(sizeof(*tile_cost[0]) * ntiles); + tile_cost[r] = (double *)aom_malloc(sizeof(*tile_cost[0]) * ntiles_y); restore_types[r] = - (RestorationType *)aom_malloc(sizeof(*restore_types[0]) * ntiles); - } - - for (r = 0; r < RESTORE_SWITCHABLE_TYPES; ++r) { - if (force_restore_type != 0) - if (r != RESTORE_NONE && r != force_restore_type) continue; - cost_restore[r] = search_restore_fun[r]( - src, cpi, method == LPF_PICK_FROM_SUBIMAGE, &cm->rst_info[0], - restore_types[r], tile_cost[r], &cpi->trial_frame_rst); + (RestorationType *)aom_malloc(sizeof(*restore_types[0]) * ntiles_y); } - cost_restore[RESTORE_SWITCHABLE] = search_switchable_restoration( - cpi, method == LPF_PICK_FROM_SUBIMAGE, &cm->rst_info[0], tile_cost); - best_cost_restore = DBL_MAX; - best_restore = 0; - for (r = 0; r < RESTORE_TYPES; ++r) { + for (int plane = AOM_PLANE_Y; plane <= AOM_PLANE_V; ++plane) { + for (r = 0; r < RESTORE_SWITCHABLE_TYPES; ++r) { + cost_restore[r] = DBL_MAX; + if (force_restore_type != 0) + if (r != RESTORE_NONE && r != force_restore_type) continue; + cost_restore[r] = + search_restore_fun[r](src, cpi, method == LPF_PICK_FROM_SUBIMAGE, + plane, &cm->rst_info[plane], restore_types[r], + tile_cost[r], &cpi->trial_frame_rst); + } + if (plane == AOM_PLANE_Y) + cost_restore[RESTORE_SWITCHABLE] = + search_switchable_restoration(cpi, method == LPF_PICK_FROM_SUBIMAGE, + plane, &cm->rst_info[plane], tile_cost); + else + cost_restore[RESTORE_SWITCHABLE] = DBL_MAX; + best_cost_restore = DBL_MAX; + best_restore = 0; + for (r = 0; r < RESTORE_TYPES; ++r) { + if (force_restore_type != 0) + if (r != RESTORE_NONE && r != force_restore_type) continue; + if (cost_restore[r] < best_cost_restore) { + best_restore = r; + best_cost_restore = cost_restore[r]; + } + } + cm->rst_info[plane].frame_restoration_type = best_restore; if (force_restore_type != 0) - if (r != RESTORE_NONE && r != force_restore_type) continue; - if (cost_restore[r] < best_cost_restore) { - best_restore = r; - best_cost_restore = cost_restore[r]; + assert(best_restore == force_restore_type || + best_restore == RESTORE_NONE); + if (best_restore != RESTORE_SWITCHABLE) { + const int nt = (plane == AOM_PLANE_Y ? ntiles_y : ntiles_uv); + memcpy(cm->rst_info[plane].restoration_type, restore_types[best_restore], + nt * sizeof(restore_types[best_restore][0])); } } - cm->rst_info[0].frame_restoration_type = best_restore; - if (force_restore_type != 0) - assert(best_restore == force_restore_type || best_restore == RESTORE_NONE); - if (best_restore != RESTORE_SWITCHABLE) { - memcpy(cm->rst_info[0].restoration_type, restore_types[best_restore], - ntiles * sizeof(restore_types[best_restore][0])); - } - - // Color components - search_wiener_uv(src, cpi, method == LPF_PICK_FROM_SUBIMAGE, AOM_PLANE_U, - &cm->rst_info[AOM_PLANE_U], - cm->rst_info[AOM_PLANE_U].restoration_type, - &cpi->trial_frame_rst); - search_wiener_uv(src, cpi, method == LPF_PICK_FROM_SUBIMAGE, AOM_PLANE_V, - &cm->rst_info[AOM_PLANE_V], - cm->rst_info[AOM_PLANE_V].restoration_type, - &cpi->trial_frame_rst); /* - printf("Frame %d/%d restore types: %d %d %d\n", - cm->current_video_frame, cm->show_frame, - cm->rst_info[0].frame_restoration_type, + printf("Frame %d/%d restore types: %d %d %d\n", cm->current_video_frame, + cm->show_frame, cm->rst_info[0].frame_restoration_type, cm->rst_info[1].frame_restoration_type, cm->rst_info[2].frame_restoration_type); printf("Frame %d/%d frame_restore_type %d : %f %f %f %f\n", diff --git a/third_party/aom/av1/encoder/pvq_encoder.c b/third_party/aom/av1/encoder/pvq_encoder.c index ab63f1b7d..9d5133012 100644 --- a/third_party/aom/av1/encoder/pvq_encoder.c +++ b/third_party/aom/av1/encoder/pvq_encoder.c @@ -247,23 +247,23 @@ static double od_pvq_rate(int qg, int icgr, int theta, int ts, aom_writer w; od_pvq_codeword_ctx cd; int tell; -#if CONFIG_DAALA_EC +#if !CONFIG_ANS od_ec_enc_init(&w.ec, 1000); #else -# error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +# error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif OD_COPY(&cd, &adapt->pvq.pvq_codeword_ctx, 1); -#if CONFIG_DAALA_EC +#if !CONFIG_ANS tell = od_ec_enc_tell_frac(&w.ec); #else -# error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +# error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif aom_encode_pvq_codeword(&w, &cd, y0, n - (theta != -1), k); -#if CONFIG_DAALA_EC +#if !CONFIG_ANS rate = (od_ec_enc_tell_frac(&w.ec)-tell)/8.; od_ec_enc_clear(&w.ec); #else -# error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +# error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif } if (qg > 0 && theta >= 0) { @@ -847,22 +847,22 @@ PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc, int tell2; od_rollback_buffer dc_buf; - dc_rate = -OD_LOG2((double)(skip_cdf[3] - skip_cdf[2])/ - (double)(skip_cdf[2] - skip_cdf[1])); + dc_rate = -OD_LOG2((double)(OD_ICDF(skip_cdf[3]) - OD_ICDF(skip_cdf[2]))/ + (double)(OD_ICDF(skip_cdf[2]) - OD_ICDF(skip_cdf[1]))); dc_rate += 1; -#if CONFIG_DAALA_EC +#if !CONFIG_ANS tell2 = od_ec_enc_tell_frac(&enc->w.ec); #else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +#error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif od_encode_checkpoint(enc, &dc_buf); generic_encode(&enc->w, &enc->state.adapt->model_dc[pli], n - 1, &enc->state.adapt->ex_dc[pli][bs][0], 2); -#if CONFIG_DAALA_EC +#if !CONFIG_ANS tell2 = od_ec_enc_tell_frac(&enc->w.ec) - tell2; #else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +#error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif dc_rate += tell2/8.0; od_encode_rollback(enc, &dc_buf); @@ -871,10 +871,10 @@ PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc, enc->pvq_norm_lambda); } } -#if CONFIG_DAALA_EC +#if !CONFIG_ANS tell = od_ec_enc_tell_frac(&enc->w.ec); #else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +#error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif /* Code as if we're not skipping. */ aom_write_symbol(&enc->w, 2 + (out[0] != 0), skip_cdf, 4); @@ -921,22 +921,22 @@ PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc, } if (encode_flip) cfl_encoded = 1; } -#if CONFIG_DAALA_EC +#if !CONFIG_ANS tell = od_ec_enc_tell_frac(&enc->w.ec) - tell; #else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +#error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif /* Account for the rate of skipping the AC, based on the same DC decision we made when trying to not skip AC. */ { double skip_rate; if (out[0] != 0) { - skip_rate = -OD_LOG2((skip_cdf[1] - skip_cdf[0])/ - (double)skip_cdf[3]); + skip_rate = -OD_LOG2((OD_ICDF(skip_cdf[1]) - OD_ICDF(skip_cdf[0]))/ + (double)OD_ICDF(skip_cdf[3])); } else { - skip_rate = -OD_LOG2(skip_cdf[0]/ - (double)skip_cdf[3]); + skip_rate = -OD_LOG2(OD_ICDF(skip_cdf[0])/ + (double)OD_ICDF(skip_cdf[3])); } tell -= (int)floor(.5+8*skip_rate); } @@ -951,22 +951,22 @@ PVQ_SKIP_TYPE od_pvq_encode(daala_enc_ctx *enc, int tell2; od_rollback_buffer dc_buf; - dc_rate = -OD_LOG2((double)(skip_cdf[1] - skip_cdf[0])/ - (double)skip_cdf[0]); + dc_rate = -OD_LOG2((double)(OD_ICDF(skip_cdf[1]) - OD_ICDF(skip_cdf[0]))/ + (double)OD_ICDF(skip_cdf[0])); dc_rate += 1; -#if CONFIG_DAALA_EC +#if !CONFIG_ANS tell2 = od_ec_enc_tell_frac(&enc->w.ec); #else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +#error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif od_encode_checkpoint(enc, &dc_buf); generic_encode(&enc->w, &enc->state.adapt->model_dc[pli], n - 1, &enc->state.adapt->ex_dc[pli][bs][0], 2); -#if CONFIG_DAALA_EC +#if !CONFIG_ANS tell2 = od_ec_enc_tell_frac(&enc->w.ec) - tell2; #else -#error "CONFIG_PVQ currently requires CONFIG_DAALA_EC." +#error "CONFIG_PVQ currently requires !CONFIG_ANS." #endif dc_rate += tell2/8.0; od_encode_rollback(enc, &dc_buf); diff --git a/third_party/aom/av1/encoder/ransac.c b/third_party/aom/av1/encoder/ransac.c index 5d5dd7572..bbd2d179c 100644 --- a/third_party/aom/av1/encoder/ransac.c +++ b/third_party/aom/av1/encoder/ransac.c @@ -8,7 +8,6 @@ * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ -#define _POSIX_C_SOURCE 200112L // rand_r() #include <memory.h> #include <math.h> #include <time.h> @@ -17,6 +16,7 @@ #include <assert.h> #include "av1/encoder/ransac.h" +#include "av1/encoder/mathutils.h" #define MAX_MINPTS 4 #define MAX_DEGENERATE_ITER 10 @@ -133,309 +133,6 @@ static void project_points_double_homography(double *mat, double *points, } } -/////////////////////////////////////////////////////////////////////////////// -// svdcmp -// Adopted from Numerical Recipes in C - -static const double TINY_NEAR_ZERO = 1.0E-12; - -static INLINE double sign(double a, double b) { - return ((b) >= 0 ? fabs(a) : -fabs(a)); -} - -static INLINE double pythag(double a, double b) { - double ct; - const double absa = fabs(a); - const double absb = fabs(b); - - if (absa > absb) { - ct = absb / absa; - return absa * sqrt(1.0 + ct * ct); - } else { - ct = absa / absb; - return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct); - } -} - -static void multiply_mat(const double *m1, const double *m2, double *res, - const int m1_rows, const int inner_dim, - const int m2_cols) { - double sum; - - int row, col, inner; - for (row = 0; row < m1_rows; ++row) { - for (col = 0; col < m2_cols; ++col) { - sum = 0; - for (inner = 0; inner < inner_dim; ++inner) - sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col]; - *(res++) = sum; - } - } -} - -static int svdcmp(double **u, int m, int n, double w[], double **v) { - const int max_its = 30; - int flag, i, its, j, jj, k, l, nm; - double anorm, c, f, g, h, s, scale, x, y, z; - double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1)); - g = scale = anorm = 0.0; - for (i = 0; i < n; i++) { - l = i + 1; - rv1[i] = scale * g; - g = s = scale = 0.0; - if (i < m) { - for (k = i; k < m; k++) scale += fabs(u[k][i]); - if (scale != 0.) { - for (k = i; k < m; k++) { - u[k][i] /= scale; - s += u[k][i] * u[k][i]; - } - f = u[i][i]; - g = -sign(sqrt(s), f); - h = f * g - s; - u[i][i] = f - g; - for (j = l; j < n; j++) { - for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j]; - f = s / h; - for (k = i; k < m; k++) u[k][j] += f * u[k][i]; - } - for (k = i; k < m; k++) u[k][i] *= scale; - } - } - w[i] = scale * g; - g = s = scale = 0.0; - if (i < m && i != n - 1) { - for (k = l; k < n; k++) scale += fabs(u[i][k]); - if (scale != 0.) { - for (k = l; k < n; k++) { - u[i][k] /= scale; - s += u[i][k] * u[i][k]; - } - f = u[i][l]; - g = -sign(sqrt(s), f); - h = f * g - s; - u[i][l] = f - g; - for (k = l; k < n; k++) rv1[k] = u[i][k] / h; - for (j = l; j < m; j++) { - for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k]; - for (k = l; k < n; k++) u[j][k] += s * rv1[k]; - } - for (k = l; k < n; k++) u[i][k] *= scale; - } - } - anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i]))); - } - - for (i = n - 1; i >= 0; i--) { - if (i < n - 1) { - if (g != 0.) { - for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g; - for (j = l; j < n; j++) { - for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j]; - for (k = l; k < n; k++) v[k][j] += s * v[k][i]; - } - } - for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0; - } - v[i][i] = 1.0; - g = rv1[i]; - l = i; - } - for (i = AOMMIN(m, n) - 1; i >= 0; i--) { - l = i + 1; - g = w[i]; - for (j = l; j < n; j++) u[i][j] = 0.0; - if (g != 0.) { - g = 1.0 / g; - for (j = l; j < n; j++) { - for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j]; - f = (s / u[i][i]) * g; - for (k = i; k < m; k++) u[k][j] += f * u[k][i]; - } - for (j = i; j < m; j++) u[j][i] *= g; - } else { - for (j = i; j < m; j++) u[j][i] = 0.0; - } - ++u[i][i]; - } - for (k = n - 1; k >= 0; k--) { - for (its = 0; its < max_its; its++) { - flag = 1; - for (l = k; l >= 0; l--) { - nm = l - 1; - if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) { - flag = 0; - break; - } - if ((double)(fabs(w[nm]) + anorm) == anorm) break; - } - if (flag) { - c = 0.0; - s = 1.0; - for (i = l; i <= k; i++) { - f = s * rv1[i]; - rv1[i] = c * rv1[i]; - if ((double)(fabs(f) + anorm) == anorm) break; - g = w[i]; - h = pythag(f, g); - w[i] = h; - h = 1.0 / h; - c = g * h; - s = -f * h; - for (j = 0; j < m; j++) { - y = u[j][nm]; - z = u[j][i]; - u[j][nm] = y * c + z * s; - u[j][i] = z * c - y * s; - } - } - } - z = w[k]; - if (l == k) { - if (z < 0.0) { - w[k] = -z; - for (j = 0; j < n; j++) v[j][k] = -v[j][k]; - } - break; - } - if (its == max_its - 1) { - aom_free(rv1); - return 1; - } - assert(k > 0); - x = w[l]; - nm = k - 1; - y = w[nm]; - g = rv1[nm]; - h = rv1[k]; - f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y); - g = pythag(f, 1.0); - f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x; - c = s = 1.0; - for (j = l; j <= nm; j++) { - i = j + 1; - g = rv1[i]; - y = w[i]; - h = s * g; - g = c * g; - z = pythag(f, h); - rv1[j] = z; - c = f / z; - s = h / z; - f = x * c + g * s; - g = g * c - x * s; - h = y * s; - y *= c; - for (jj = 0; jj < n; jj++) { - x = v[jj][j]; - z = v[jj][i]; - v[jj][j] = x * c + z * s; - v[jj][i] = z * c - x * s; - } - z = pythag(f, h); - w[j] = z; - if (z != 0.) { - z = 1.0 / z; - c = f * z; - s = h * z; - } - f = c * g + s * y; - x = c * y - s * g; - for (jj = 0; jj < m; jj++) { - y = u[jj][j]; - z = u[jj][i]; - u[jj][j] = y * c + z * s; - u[jj][i] = z * c - y * s; - } - } - rv1[l] = 0.0; - rv1[k] = f; - w[k] = x; - } - } - aom_free(rv1); - return 0; -} - -static int SVD(double *U, double *W, double *V, double *matx, int M, int N) { - // Assumes allocation for U is MxN - double **nrU = (double **)aom_malloc((M) * sizeof(*nrU)); - double **nrV = (double **)aom_malloc((N) * sizeof(*nrV)); - int problem, i; - - problem = !(nrU && nrV); - if (!problem) { - for (i = 0; i < M; i++) { - nrU[i] = &U[i * N]; - } - for (i = 0; i < N; i++) { - nrV[i] = &V[i * N]; - } - } else { - if (nrU) aom_free(nrU); - if (nrV) aom_free(nrV); - return 1; - } - - /* copy from given matx into nrU */ - for (i = 0; i < M; i++) { - memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx)); - } - - /* HERE IT IS: do SVD */ - if (svdcmp(nrU, M, N, W, nrV)) { - aom_free(nrU); - aom_free(nrV); - return 1; - } - - /* aom_free Numerical Recipes arrays */ - aom_free(nrU); - aom_free(nrV); - - return 0; -} - -int pseudo_inverse(double *inv, double *matx, const int M, const int N) { - double ans; - int i, j, k; - double *const U = (double *)aom_malloc(M * N * sizeof(*matx)); - double *const W = (double *)aom_malloc(N * sizeof(*matx)); - double *const V = (double *)aom_malloc(N * N * sizeof(*matx)); - - if (!(U && W && V)) { - return 1; - } - if (SVD(U, W, V, matx, M, N)) { - aom_free(U); - aom_free(W); - aom_free(V); - return 1; - } - for (i = 0; i < N; i++) { - if (fabs(W[i]) < TINY_NEAR_ZERO) { - aom_free(U); - aom_free(W); - aom_free(V); - return 1; - } - } - - for (i = 0; i < N; i++) { - for (j = 0; j < M; j++) { - ans = 0; - for (k = 0; k < N; k++) { - ans += V[k + N * i] * U[k + N * j] / W[k]; - } - inv[j + M * i] = ans; - } - } - aom_free(U); - aom_free(W); - aom_free(V); - return 0; -} - static void normalize_homography(double *pts, int n, double *T) { double *p = pts; double mean[2] = { 0, 0 }; @@ -597,7 +294,7 @@ static int find_translation(int np, double *pts1, double *pts2, double *mat) { static int find_rotzoom(int np, double *pts1, double *pts2, double *mat) { const int np2 = np * 2; - double *a = (double *)aom_malloc(sizeof(*a) * np2 * 9); + double *a = (double *)aom_malloc(sizeof(*a) * (np2 * 5 + 20)); double *b = a + np2 * 4; double *temp = b + np2; int i; @@ -625,11 +322,10 @@ static int find_rotzoom(int np, double *pts1, double *pts2, double *mat) { b[2 * i] = dx; b[2 * i + 1] = dy; } - if (pseudo_inverse(temp, a, np2, 4)) { + if (!least_squares(4, a, np2, 4, b, temp, mat)) { aom_free(a); return 1; } - multiply_mat(temp, b, mat, 4, np2, 1); denormalize_rotzoom_reorder(mat, T1, T2); aom_free(a); return 0; @@ -637,7 +333,7 @@ static int find_rotzoom(int np, double *pts1, double *pts2, double *mat) { static int find_affine(int np, double *pts1, double *pts2, double *mat) { const int np2 = np * 2; - double *a = (double *)aom_malloc(sizeof(*a) * np2 * 13); + double *a = (double *)aom_malloc(sizeof(*a) * (np2 * 7 + 42)); double *b = a + np2 * 6; double *temp = b + np2; int i; @@ -669,11 +365,10 @@ static int find_affine(int np, double *pts1, double *pts2, double *mat) { b[2 * i] = dx; b[2 * i + 1] = dy; } - if (pseudo_inverse(temp, a, np2, 6)) { + if (!least_squares(6, a, np2, 6, b, temp, mat)) { aom_free(a); return 1; } - multiply_mat(temp, b, mat, 6, np2, 1); denormalize_affine_reorder(mat, T1, T2); aom_free(a); return 0; @@ -890,16 +585,22 @@ static int find_homography(int np, double *pts1, double *pts2, double *mat) { return 0; } +// Generate a random number in the range [0, 32768). +static unsigned int lcg_rand16(unsigned int *state) { + *state = (unsigned int)(*state * 1103515245ULL + 12345); + return *state / 65536 % 32768; +} + static int get_rand_indices(int npoints, int minpts, int *indices, unsigned int *seed) { int i, j; - int ptr = rand_r(seed) % npoints; + int ptr = lcg_rand16(seed) % npoints; if (minpts > npoints) return 0; indices[0] = ptr; ptr = (ptr == npoints - 1 ? 0 : ptr + 1); i = 1; while (i < minpts) { - int index = rand_r(seed) % npoints; + int index = lcg_rand16(seed) % npoints; while (index) { ptr = (ptr == npoints - 1 ? 0 : ptr + 1); for (j = 0; j < i; ++j) { @@ -986,6 +687,9 @@ static int ransac(const int *matched_points, int npoints, double *cnp1, *cnp2; + for (i = 0; i < num_desired_motions; ++i) { + num_inliers_by_motion[i] = 0; + } if (npoints < minpts * MINPTS_MULTIPLIER || npoints == 0) { return 1; } @@ -1072,7 +776,7 @@ static int ransac(const int *matched_points, int npoints, if (current_motion.num_inliers >= worst_kept_motion->num_inliers && current_motion.num_inliers > 1) { int temp; - double fracinliers, pNoOutliers, mean_distance; + double fracinliers, pNoOutliers, mean_distance, dtemp; mean_distance = sum_distance / ((double)current_motion.num_inliers); current_motion.variance = sum_distance_squared / ((double)current_motion.num_inliers - 1.0) - @@ -1092,7 +796,10 @@ static int ransac(const int *matched_points, int npoints, pNoOutliers = 1 - pow(fracinliers, minpts); pNoOutliers = fmax(EPS, pNoOutliers); pNoOutliers = fmin(1 - EPS, pNoOutliers); - temp = (int)(log(1.0 - PROBABILITY_REQUIRED) / log(pNoOutliers)); + dtemp = log(1.0 - PROBABILITY_REQUIRED) / log(pNoOutliers); + temp = (dtemp > (double)INT32_MAX) + ? INT32_MAX + : dtemp < (double)INT32_MIN ? INT32_MIN : (int)dtemp; if (temp > 0 && temp < N) { N = AOMMAX(temp, MIN_TRIALS); diff --git a/third_party/aom/av1/encoder/ratectrl.c b/third_party/aom/av1/encoder/ratectrl.c index 1f2ea3606..4552c674e 100644 --- a/third_party/aom/av1/encoder/ratectrl.c +++ b/third_party/aom/av1/encoder/ratectrl.c @@ -93,6 +93,11 @@ static int gf_low = 400; static int kf_high = 5000; static int kf_low = 400; +double av1_resize_rate_factor(const AV1_COMP *cpi) { + return (double)(cpi->resize_scale_den * cpi->resize_scale_den) / + (cpi->resize_scale_num * cpi->resize_scale_num); +} + // Functions to compute the active minq lookup table entries based on a // formulaic approach to facilitate easier adjustment of the Q tables. // The formulae were derived from computing a 3rd order polynomial best @@ -384,7 +389,7 @@ static double get_rate_correction_factor(const AV1_COMP *cpi) { else rcf = rc->rate_correction_factors[INTER_NORMAL]; } - rcf *= rcf_mult[rc->frame_size_selector]; + rcf *= av1_resize_rate_factor(cpi); return fclamp(rcf, MIN_BPB_FACTOR, MAX_BPB_FACTOR); } @@ -392,7 +397,7 @@ static void set_rate_correction_factor(AV1_COMP *cpi, double factor) { RATE_CONTROL *const rc = &cpi->rc; // Normalize RCF to account for the size-dependent scaling factor. - factor /= rcf_mult[cpi->rc.frame_size_selector]; + factor /= av1_resize_rate_factor(cpi); factor = fclamp(factor, MIN_BPB_FACTOR, MAX_BPB_FACTOR); @@ -1076,7 +1081,7 @@ static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int *bottom_index, } // Modify active_best_quality for downscaled normal frames. - if (rc->frame_size_selector != UNSCALED && !frame_is_kf_gf_arf(cpi)) { + if (!av1_resize_unscaled(cpi) && !frame_is_kf_gf_arf(cpi)) { int qdelta = av1_compute_qdelta_by_rate( rc, cm->frame_type, active_best_quality, 2.0, cm->bit_depth); active_best_quality = @@ -1158,11 +1163,10 @@ void av1_rc_set_frame_target(AV1_COMP *cpi, int target) { rc->this_frame_target = target; - // Modify frame size target when down-scaling. - if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC && - rc->frame_size_selector != UNSCALED) - rc->this_frame_target = (int)(rc->this_frame_target * - rate_thresh_mult[rc->frame_size_selector]); + // Modify frame size target when down-scaled. + if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC && !av1_resize_unscaled(cpi)) + rc->this_frame_target = + (int)(rc->this_frame_target * av1_resize_rate_factor(cpi)); // Target rate per SB64 (including partial SB64s. rc->sb64_target_rate = (int)((int64_t)rc->this_frame_target * 64 * 64) / @@ -1225,7 +1229,6 @@ static void update_golden_frame_stats(AV1_COMP *cpi) { void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) { const AV1_COMMON *const cm = &cpi->common; - const AV1EncoderConfig *const oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; const int qindex = cm->base_qindex; @@ -1317,13 +1320,6 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) { rc->frames_since_key++; rc->frames_to_key--; } - - // Trigger the resizing of the next frame if it is scaled. - if (oxcf->pass != 0) { - cpi->resize_pending = - rc->next_frame_size_selector != rc->frame_size_selector; - rc->frame_size_selector = rc->next_frame_size_selector; - } } void av1_rc_postencode_update_drop_frame(AV1_COMP *cpi) { @@ -1501,10 +1497,7 @@ void av1_rc_get_one_pass_cbr_params(AV1_COMP *cpi) { target = calc_pframe_target_size_one_pass_cbr(cpi); av1_rc_set_frame_target(cpi, target); - if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC) - cpi->resize_pending = av1_resize_one_pass_cbr(cpi); - else - cpi->resize_pending = 0; + // TODO(afergs): Decide whether to scale up, down, or not at all } int av1_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget, @@ -1670,90 +1663,3 @@ void av1_set_target_rate(AV1_COMP *cpi) { vbr_rate_correction(cpi, &target_rate); av1_rc_set_frame_target(cpi, target_rate); } - -// Check if we should resize, based on average QP from past x frames. -// Only allow for resize at most one scale down for now, scaling factor is 2. -int av1_resize_one_pass_cbr(AV1_COMP *cpi) { - const AV1_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; - int resize_now = 0; - cpi->resize_scale_num = 1; - cpi->resize_scale_den = 1; - // Don't resize on key frame; reset the counters on key frame. - if (cm->frame_type == KEY_FRAME) { - cpi->resize_avg_qp = 0; - cpi->resize_count = 0; - return 0; - } - // Resize based on average buffer underflow and QP over some window. - // Ignore samples close to key frame, since QP is usually high after key. - if (cpi->rc.frames_since_key > 2 * cpi->framerate) { - const int window = (int)(5 * cpi->framerate); - cpi->resize_avg_qp += cm->base_qindex; - if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100)) - ++cpi->resize_buffer_underflow; - ++cpi->resize_count; - // Check for resize action every "window" frames. - if (cpi->resize_count >= window) { - int avg_qp = cpi->resize_avg_qp / cpi->resize_count; - // Resize down if buffer level has underflowed sufficent amount in past - // window, and we are at original resolution. - // Resize back up if average QP is low, and we are currently in a resized - // down state. - if (cpi->resize_state == 0 && - cpi->resize_buffer_underflow > (cpi->resize_count >> 2)) { - resize_now = 1; - cpi->resize_state = 1; - } else if (cpi->resize_state == 1 && - avg_qp < 40 * cpi->rc.worst_quality / 100) { - resize_now = -1; - cpi->resize_state = 0; - } - // Reset for next window measurement. - cpi->resize_avg_qp = 0; - cpi->resize_count = 0; - cpi->resize_buffer_underflow = 0; - } - } - // If decision is to resize, reset some quantities, and check is we should - // reduce rate correction factor, - if (resize_now != 0) { - int target_bits_per_frame; - int active_worst_quality; - int qindex; - int tot_scale_change; - // For now, resize is by 1/2 x 1/2. - cpi->resize_scale_num = 1; - cpi->resize_scale_den = 2; - tot_scale_change = (cpi->resize_scale_den * cpi->resize_scale_den) / - (cpi->resize_scale_num * cpi->resize_scale_num); - // Reset buffer level to optimal, update target size. - rc->buffer_level = rc->optimal_buffer_level; - rc->bits_off_target = rc->optimal_buffer_level; - rc->this_frame_target = calc_pframe_target_size_one_pass_cbr(cpi); - // Reset cyclic refresh parameters. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) - av1_cyclic_refresh_reset_resize(cpi); - // Get the projected qindex, based on the scaled target frame size (scaled - // so target_bits_per_mb in av1_rc_regulate_q will be correct target). - target_bits_per_frame = (resize_now == 1) - ? rc->this_frame_target * tot_scale_change - : rc->this_frame_target / tot_scale_change; - active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi); - qindex = av1_rc_regulate_q(cpi, target_bits_per_frame, rc->best_quality, - active_worst_quality); - // If resize is down, check if projected q index is close to worst_quality, - // and if so, reduce the rate correction factor (since likely can afford - // lower q for resized frame). - if (resize_now == 1 && qindex > 90 * cpi->rc.worst_quality / 100) { - rc->rate_correction_factors[INTER_NORMAL] *= 0.85; - } - // If resize is back up, check if projected q index is too much above the - // current base_qindex, and if so, reduce the rate correction factor - // (since prefer to keep q for resized frame at least close to previous q). - if (resize_now == -1 && qindex > 130 * cm->base_qindex / 100) { - rc->rate_correction_factors[INTER_NORMAL] *= 0.9; - } - } - return resize_now; -} diff --git a/third_party/aom/av1/encoder/ratectrl.h b/third_party/aom/av1/encoder/ratectrl.h index 93a9b4939..61bb0c224 100644 --- a/third_party/aom/av1/encoder/ratectrl.h +++ b/third_party/aom/av1/encoder/ratectrl.h @@ -49,27 +49,6 @@ typedef enum { } RATE_FACTOR_LEVEL; #endif // CONFIG_EXT_REFS -// Internal frame scaling level. -typedef enum { - UNSCALED = 0, // Frame is unscaled. - SCALE_STEP1 = 1, // First-level down-scaling. - FRAME_SCALE_STEPS -} FRAME_SCALE_LEVEL; - -// Frame dimensions multiplier wrt the native frame size, in 1/16ths, -// specified for the scale-up case. -// e.g. 24 => 16/24 = 2/3 of native size. The restriction to 1/16th is -// intended to match the capabilities of the normative scaling filters, -// giving precedence to the up-scaling accuracy. -static const int frame_scale_factor[FRAME_SCALE_STEPS] = { 16, 24 }; - -// Multiplier of the target rate to be used as threshold for triggering scaling. -static const double rate_thresh_mult[FRAME_SCALE_STEPS] = { 1.0, 2.0 }; - -// Scale dependent Rate Correction Factor multipliers. Compensates for the -// greater number of bits per pixel generated in down-scaled frames. -static const double rcf_mult[FRAME_SCALE_STEPS] = { 1.0, 2.0 }; - typedef struct { // Rate targetting variables int base_frame_target; // A baseline frame target before adjustment @@ -162,10 +141,6 @@ typedef struct { int q_2_frame; // Auto frame-scaling variables. - FRAME_SCALE_LEVEL frame_size_selector; - FRAME_SCALE_LEVEL next_frame_size_selector; - int frame_width[FRAME_SCALE_STEPS]; - int frame_height[FRAME_SCALE_STEPS]; int rf_level_maxq[RATE_FACTOR_LEVELS]; } RATE_CONTROL; @@ -214,6 +189,10 @@ int av1_rc_get_default_max_gf_interval(double framerate, int min_frame_rate); void av1_rc_get_one_pass_vbr_params(struct AV1_COMP *cpi); void av1_rc_get_one_pass_cbr_params(struct AV1_COMP *cpi); +// How many times less pixels there are to encode given the current scaling. +// Temporary replacement for rcf_mult and rate_thresh_mult. +double av1_resize_rate_factor(const struct AV1_COMP *cpi); + // Post encode update of the rate control parameters based // on bytes used void av1_rc_postencode_update(struct AV1_COMP *cpi, uint64_t bytes_used); diff --git a/third_party/aom/av1/encoder/rd.c b/third_party/aom/av1/encoder/rd.c index f06e569e7..94c3bb96d 100644 --- a/third_party/aom/av1/encoder/rd.c +++ b/third_party/aom/av1/encoder/rd.c @@ -330,7 +330,6 @@ static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd) { } } -#if CONFIG_REF_MV void av1_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, int ref, int ref_mv_idx) { MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext; @@ -340,19 +339,14 @@ void av1_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, int ref, (void)ref_frame; x->mvcost = x->mv_cost_stack[nmv_ctx]; x->nmvjointcost = x->nmv_vec_cost[nmv_ctx]; - x->mvsadcost = x->mvcost; - x->nmvjointsadcost = x->nmvjointcost; } -#endif void av1_initialize_rd_consts(AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->td.mb; RD_OPT *const rd = &cpi->rd; int i; -#if CONFIG_REF_MV int nmv_ctx; -#endif aom_clear_system_state(); @@ -363,7 +357,6 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) { set_block_thresholds(cm, rd); -#if CONFIG_REF_MV for (nmv_ctx = 0; nmv_ctx < NMV_CONTEXTS; ++nmv_ctx) { av1_build_nmv_cost_table( x->nmv_vec_cost[nmv_ctx], @@ -373,19 +366,11 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) { } x->mvcost = x->mv_cost_stack[0]; x->nmvjointcost = x->nmv_vec_cost[0]; - x->mvsadcost = x->mvcost; - x->nmvjointsadcost = x->nmvjointcost; -#else - av1_build_nmv_cost_table( - x->nmvjointcost, cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, - &cm->fc->nmvc, cm->allow_high_precision_mv); -#endif if (cpi->oxcf.pass != 1) { av1_fill_token_costs(x->token_costs, cm->fc->coef_probs); - if (cpi->sf.partition_search_type != VAR_BASED_PARTITION || - cm->frame_type == KEY_FRAME) { + if (cm->frame_type == KEY_FRAME) { #if CONFIG_EXT_PARTITION_TYPES for (i = 0; i < PARTITION_PLOFFSET; ++i) av1_cost_tokens(cpi->partition_cost[i], cm->fc->partition_prob[i], @@ -425,7 +410,6 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) { fill_mode_costs(cpi); if (!frame_is_intra_only(cm)) { -#if CONFIG_REF_MV for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) { cpi->newmv_mode_cost[i][0] = av1_cost_bit(cm->fc->newmv_prob[i], 0); cpi->newmv_mode_cost[i][1] = av1_cost_bit(cm->fc->newmv_prob[i], 1); @@ -445,20 +429,17 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) { cpi->drl_mode_cost0[i][0] = av1_cost_bit(cm->fc->drl_prob[i], 0); cpi->drl_mode_cost0[i][1] = av1_cost_bit(cm->fc->drl_prob[i], 1); } -#else - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - av1_cost_tokens((int *)cpi->inter_mode_cost[i], - cm->fc->inter_mode_probs[i], av1_inter_mode_tree); -#endif // CONFIG_REF_MV #if CONFIG_EXT_INTER for (i = 0; i < INTER_MODE_CONTEXTS; ++i) av1_cost_tokens((int *)cpi->inter_compound_mode_cost[i], cm->fc->inter_compound_mode_probs[i], av1_inter_compound_mode_tree); +#if CONFIG_INTERINTRA for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) av1_cost_tokens((int *)cpi->interintra_mode_cost[i], cm->fc->interintra_mode_prob[i], av1_interintra_mode_tree); +#endif // CONFIG_INTERINTRA #endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION for (i = BLOCK_8X8; i < BLOCK_SIZES; i++) { @@ -575,9 +556,15 @@ static void get_entropy_contexts_plane( const ENTROPY_CONTEXT *const above = pd->above_context; const ENTROPY_CONTEXT *const left = pd->left_context; +#if CONFIG_LV_MAP + memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); + memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h); + return; +#endif // CONFIG_LV_MAP + int i; -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_2X2 switch (tx_size) { case TX_2X2: memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); @@ -609,6 +596,20 @@ static void get_entropy_contexts_plane( t_left[i] = !!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8]); break; +#if CONFIG_TX64X64 + case TX_64X64: + for (i = 0; i < num_4x4_w; i += 32) + t_above[i] = + !!(*(const uint64_t *)&above[i] | *(const uint64_t *)&above[i + 8] | + *(const uint64_t *)&above[i + 16] | + *(const uint64_t *)&above[i + 24]); + for (i = 0; i < num_4x4_h; i += 32) + t_left[i] = + !!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8] | + *(const uint64_t *)&left[i + 16] | + *(const uint64_t *)&left[i + 24]); + break; +#endif // CONFIG_TX64X64 case TX_4X8: for (i = 0; i < num_4x4_w; i += 2) t_above[i] = !!*(const uint16_t *)&above[i]; @@ -647,11 +648,39 @@ static void get_entropy_contexts_plane( for (i = 0; i < num_4x4_h; i += 8) t_left[i] = !!*(const uint64_t *)&left[i]; break; +#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT + case TX_4X16: + for (i = 0; i < num_4x4_w; i += 2) + t_above[i] = !!*(const uint16_t *)&above[i]; + for (i = 0; i < num_4x4_h; i += 8) + t_left[i] = !!*(const uint64_t *)&left[i]; + break; + case TX_16X4: + for (i = 0; i < num_4x4_w; i += 8) + t_above[i] = !!*(const uint64_t *)&above[i]; + for (i = 0; i < num_4x4_h; i += 2) + t_left[i] = !!*(const uint16_t *)&left[i]; + break; + case TX_8X32: + for (i = 0; i < num_4x4_w; i += 4) + t_above[i] = !!*(const uint32_t *)&above[i]; + for (i = 0; i < num_4x4_h; i += 16) + t_left[i] = + !!(*(const uint64_t *)&left[i] | *(const uint64_t *)&left[i + 8]); + break; + case TX_32X8: + for (i = 0; i < num_4x4_w; i += 16) + t_above[i] = + !!(*(const uint64_t *)&above[i] | *(const uint64_t *)&above[i + 8]); + for (i = 0; i < num_4x4_h; i += 4) + t_left[i] = !!*(const uint32_t *)&left[i]; + break; +#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT default: assert(0 && "Invalid transform size."); break; } return; -#endif +#endif // CONFIG_CHROMA_2X2 switch (tx_size) { case TX_4X4: @@ -720,6 +749,30 @@ static void get_entropy_contexts_plane( for (i = 0; i < num_4x4_h; i += 4) t_left[i] = !!*(const uint32_t *)&left[i]; break; +#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT + case TX_4X16: + memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); + for (i = 0; i < num_4x4_h; i += 4) + t_left[i] = !!*(const uint32_t *)&left[i]; + break; + case TX_16X4: + for (i = 0; i < num_4x4_w; i += 4) + t_above[i] = !!*(const uint32_t *)&above[i]; + memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h); + break; + case TX_8X32: + for (i = 0; i < num_4x4_w; i += 2) + t_above[i] = !!*(const uint16_t *)&above[i]; + for (i = 0; i < num_4x4_h; i += 8) + t_left[i] = !!*(const uint64_t *)&left[i]; + break; + case TX_32X8: + for (i = 0; i < num_4x4_w; i += 8) + t_above[i] = !!*(const uint64_t *)&above[i]; + for (i = 0; i < num_4x4_h; i += 2) + t_left[i] = !!*(const uint16_t *)&left[i]; + break; +#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT default: assert(0 && "Invalid transform size."); break; } } @@ -728,7 +781,12 @@ void av1_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, const struct macroblockd_plane *pd, ENTROPY_CONTEXT t_above[2 * MAX_MIB_SIZE], ENTROPY_CONTEXT t_left[2 * MAX_MIB_SIZE]) { +#if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2 + const BLOCK_SIZE plane_bsize = + AOMMAX(BLOCK_4X4, get_plane_block_size(bsize, pd)); +#else const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); +#endif get_entropy_contexts_plane(plane_bsize, tx_size, pd, t_above, t_left); } @@ -740,27 +798,25 @@ void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, int best_sad = INT_MAX; int this_sad = INT_MAX; int max_mv = 0; - int near_same_nearest; uint8_t *src_y_ptr = x->plane[0].src.buf; uint8_t *ref_y_ptr; - const int num_mv_refs = - MAX_MV_REF_CANDIDATES + - (cpi->sf.adaptive_motion_search && block_size < x->max_partition_size); - - MV pred_mv[3]; - pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv; - pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv; - pred_mv[2] = x->pred_mv[ref_frame]; + MV pred_mv[MAX_MV_REF_CANDIDATES + 1]; + int num_mv_refs = 0; + + pred_mv[num_mv_refs++] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv; + if (x->mbmi_ext->ref_mvs[ref_frame][0].as_int != + x->mbmi_ext->ref_mvs[ref_frame][1].as_int) { + pred_mv[num_mv_refs++] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv; + } + if (cpi->sf.adaptive_motion_search && block_size < x->max_partition_size) + pred_mv[num_mv_refs++] = x->pred_mv[ref_frame]; + assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0]))); - near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int == - x->mbmi_ext->ref_mvs[ref_frame][1].as_int; // Get the sad for each candidate reference mv. for (i = 0; i < num_mv_refs; ++i) { const MV *this_mv = &pred_mv[i]; int fp_row, fp_col; - - if (i == 1 && near_same_nearest) continue; fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3; fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3; max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3); @@ -959,8 +1015,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { #if CONFIG_EXT_INTER - rd->thresh_mult[THR_COMP_NEAREST_NEARLA] += 1200; - rd->thresh_mult[THR_COMP_NEAR_NEARESTLA] += 1200; rd->thresh_mult[THR_COMP_NEAR_NEARLA] += 1200; rd->thresh_mult[THR_COMP_NEAREST_NEWLA] += 1500; rd->thresh_mult[THR_COMP_NEW_NEARESTLA] += 1500; @@ -970,8 +1024,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { rd->thresh_mult[THR_COMP_ZERO_ZEROLA] += 2500; #if CONFIG_EXT_REFS - rd->thresh_mult[THR_COMP_NEAREST_NEARL2A] += 1200; - rd->thresh_mult[THR_COMP_NEAR_NEARESTL2A] += 1200; rd->thresh_mult[THR_COMP_NEAR_NEARL2A] += 1200; rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] += 1500; rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] += 1500; @@ -980,8 +1032,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { rd->thresh_mult[THR_COMP_NEW_NEWL2A] += 2000; rd->thresh_mult[THR_COMP_ZERO_ZEROL2A] += 2500; - rd->thresh_mult[THR_COMP_NEAREST_NEARL3A] += 1200; - rd->thresh_mult[THR_COMP_NEAR_NEARESTL3A] += 1200; rd->thresh_mult[THR_COMP_NEAR_NEARL3A] += 1200; rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] += 1500; rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] += 1500; @@ -991,8 +1041,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { rd->thresh_mult[THR_COMP_ZERO_ZEROL3A] += 2500; #endif // CONFIG_EXT_REFS - rd->thresh_mult[THR_COMP_NEAREST_NEARGA] += 1200; - rd->thresh_mult[THR_COMP_NEAR_NEARESTGA] += 1200; rd->thresh_mult[THR_COMP_NEAR_NEARGA] += 1200; rd->thresh_mult[THR_COMP_NEAREST_NEWGA] += 1500; rd->thresh_mult[THR_COMP_NEW_NEARESTGA] += 1500; @@ -1002,8 +1050,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { rd->thresh_mult[THR_COMP_ZERO_ZEROGA] += 2500; #if CONFIG_EXT_REFS - rd->thresh_mult[THR_COMP_NEAREST_NEARLB] += 1200; - rd->thresh_mult[THR_COMP_NEAR_NEARESTLB] += 1200; rd->thresh_mult[THR_COMP_NEAR_NEARLB] += 1200; rd->thresh_mult[THR_COMP_NEAREST_NEWLB] += 1500; rd->thresh_mult[THR_COMP_NEW_NEARESTLB] += 1500; @@ -1012,8 +1058,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { rd->thresh_mult[THR_COMP_NEW_NEWLB] += 2000; rd->thresh_mult[THR_COMP_ZERO_ZEROLB] += 2500; - rd->thresh_mult[THR_COMP_NEAREST_NEARL2B] += 1200; - rd->thresh_mult[THR_COMP_NEAR_NEARESTL2B] += 1200; rd->thresh_mult[THR_COMP_NEAR_NEARL2B] += 1200; rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] += 1500; rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] += 1500; @@ -1022,8 +1066,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { rd->thresh_mult[THR_COMP_NEW_NEWL2B] += 2000; rd->thresh_mult[THR_COMP_ZERO_ZEROL2B] += 2500; - rd->thresh_mult[THR_COMP_NEAREST_NEARL3B] += 1200; - rd->thresh_mult[THR_COMP_NEAR_NEARESTL3B] += 1200; rd->thresh_mult[THR_COMP_NEAR_NEARL3B] += 1200; rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] += 1500; rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] += 1500; @@ -1032,8 +1074,6 @@ void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { rd->thresh_mult[THR_COMP_NEW_NEWL3B] += 2000; rd->thresh_mult[THR_COMP_ZERO_ZEROL3B] += 2500; - rd->thresh_mult[THR_COMP_NEAREST_NEARGB] += 1200; - rd->thresh_mult[THR_COMP_NEAR_NEARESTGB] += 1200; rd->thresh_mult[THR_COMP_NEAR_NEARGB] += 1200; rd->thresh_mult[THR_COMP_NEAREST_NEWGB] += 1500; rd->thresh_mult[THR_COMP_NEW_NEARESTGB] += 1500; diff --git a/third_party/aom/av1/encoder/rd.h b/third_party/aom/av1/encoder/rd.h index c0ac1f7e7..5c3eee493 100644 --- a/third_party/aom/av1/encoder/rd.h +++ b/third_party/aom/av1/encoder/rd.h @@ -130,6 +130,10 @@ typedef enum { #if CONFIG_ALT_INTRA THR_SMOOTH, +#if CONFIG_SMOOTH_HV + THR_SMOOTH_V, + THR_SMOOTH_H, +#endif // CONFIG_SMOOTH_HV #endif // CONFIG_ALT_INTRA #if CONFIG_EXT_INTER @@ -357,6 +361,9 @@ static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) { rd_stats->rdcost = 0; rd_stats->sse = 0; rd_stats->skip = 1; +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + rd_stats->dist_y = 0; +#endif #if CONFIG_RD_DEBUG for (plane = 0; plane < MAX_MB_PLANE; ++plane) { rd_stats->txb_coeff_cost[plane] = 0; @@ -381,6 +388,9 @@ static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) { rd_stats->rdcost = INT64_MAX; rd_stats->sse = INT64_MAX; rd_stats->skip = 0; +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + rd_stats->dist_y = INT64_MAX; +#endif #if CONFIG_RD_DEBUG for (plane = 0; plane < MAX_MB_PLANE; ++plane) { rd_stats->txb_coeff_cost[plane] = INT_MAX; @@ -405,6 +415,9 @@ static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst, rd_stats_dst->dist += rd_stats_src->dist; rd_stats_dst->sse += rd_stats_src->sse; rd_stats_dst->skip &= rd_stats_src->skip; +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + rd_stats_dst->dist_y += rd_stats_src->dist_y; +#endif #if CONFIG_RD_DEBUG for (plane = 0; plane < MAX_MB_PLANE; ++plane) { rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane]; @@ -454,10 +467,8 @@ YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const struct AV1_COMP *cpi, void av1_init_me_luts(void); -#if CONFIG_REF_MV void av1_set_mvcost(MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, int ref, int ref_mv_idx); -#endif void av1_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, const struct macroblockd_plane *pd, diff --git a/third_party/aom/av1/encoder/rdopt.c b/third_party/aom/av1/encoder/rdopt.c index a1096f782..2a537a06a 100644 --- a/third_party/aom/av1/encoder/rdopt.c +++ b/third_party/aom/av1/encoder/rdopt.c @@ -66,11 +66,18 @@ #endif // CONFIG_PVQ || CONFIG_DAALA_DIST #if CONFIG_DUAL_FILTER #define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS) +#if USE_EXTRA_FILTER static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = { { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 }, }; +#else // USE_EXTRA_FILTER +static const int filter_sets[DUAL_FILTER_SET_SIZE][2] = { + { 0, 0 }, { 0, 1 }, { 0, 2 }, { 1, 0 }, { 1, 1 }, + { 1, 2 }, { 2, 0 }, { 2, 1 }, { 2, 2 }, +}; +#endif // USE_EXTRA_FILTER #endif // CONFIG_DUAL_FILTER #if CONFIG_EXT_REFS @@ -217,11 +224,13 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { #if CONFIG_ALT_INTRA { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } }, +#if CONFIG_SMOOTH_HV + { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } }, + { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } }, +#endif // CONFIG_SMOOTH_HV #endif // CONFIG_ALT_INTRA #if CONFIG_EXT_INTER - { NEAR_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } }, - { NEAREST_NEARMV, { LAST_FRAME, ALTREF_FRAME } }, { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } }, { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } }, { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } }, @@ -231,8 +240,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { ZERO_ZEROMV, { LAST_FRAME, ALTREF_FRAME } }, #if CONFIG_EXT_REFS - { NEAR_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } }, - { NEAREST_NEARMV, { LAST2_FRAME, ALTREF_FRAME } }, { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } }, { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } }, { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } }, @@ -241,8 +248,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } }, { ZERO_ZEROMV, { LAST2_FRAME, ALTREF_FRAME } }, - { NEAR_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } }, - { NEAREST_NEARMV, { LAST3_FRAME, ALTREF_FRAME } }, { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } }, { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } }, { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } }, @@ -252,8 +257,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { ZERO_ZEROMV, { LAST3_FRAME, ALTREF_FRAME } }, #endif // CONFIG_EXT_REFS - { NEAR_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } }, - { NEAREST_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } }, { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } }, { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } }, { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } }, @@ -263,8 +266,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { ZERO_ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } }, #if CONFIG_EXT_REFS - { NEAR_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } }, - { NEAREST_NEARMV, { LAST_FRAME, BWDREF_FRAME } }, { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } }, { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } }, { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } }, @@ -273,8 +274,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } }, { ZERO_ZEROMV, { LAST_FRAME, BWDREF_FRAME } }, - { NEAR_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } }, - { NEAREST_NEARMV, { LAST2_FRAME, BWDREF_FRAME } }, { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } }, { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } }, { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } }, @@ -283,8 +282,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } }, { ZERO_ZEROMV, { LAST2_FRAME, BWDREF_FRAME } }, - { NEAR_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } }, - { NEAREST_NEARMV, { LAST3_FRAME, BWDREF_FRAME } }, { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } }, { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } }, { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } }, @@ -293,8 +290,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } }, { ZERO_ZEROMV, { LAST3_FRAME, BWDREF_FRAME } }, - { NEAR_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } }, - { NEAREST_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } }, { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } }, { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } }, { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, @@ -390,28 +385,6 @@ static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { #endif // CONFIG_EXT_INTER }; -static const REF_DEFINITION av1_ref_order[MAX_REFS] = { - { { LAST_FRAME, NONE_FRAME } }, -#if CONFIG_EXT_REFS - { { LAST2_FRAME, NONE_FRAME } }, { { LAST3_FRAME, NONE_FRAME } }, - { { BWDREF_FRAME, NONE_FRAME } }, -#endif // CONFIG_EXT_REFS - { { GOLDEN_FRAME, NONE_FRAME } }, { { ALTREF_FRAME, NONE_FRAME } }, - - { { LAST_FRAME, ALTREF_FRAME } }, -#if CONFIG_EXT_REFS - { { LAST2_FRAME, ALTREF_FRAME } }, { { LAST3_FRAME, ALTREF_FRAME } }, -#endif // CONFIG_EXT_REFS - { { GOLDEN_FRAME, ALTREF_FRAME } }, - -#if CONFIG_EXT_REFS - { { LAST_FRAME, BWDREF_FRAME } }, { { LAST2_FRAME, BWDREF_FRAME } }, - { { LAST3_FRAME, BWDREF_FRAME } }, { { GOLDEN_FRAME, BWDREF_FRAME } }, -#endif // CONFIG_EXT_REFS - - { { INTRA_FRAME, NONE_FRAME } }, -}; - #if CONFIG_EXT_INTRA || CONFIG_FILTER_INTRA || CONFIG_PALETTE static INLINE int write_uniform_cost(int n, int v) { const int l = get_unsigned_bits(n); @@ -430,22 +403,6 @@ static INLINE int write_uniform_cost(int n, int v) { #define FAST_EXT_TX_CORR_MARGIN 0.5 #define FAST_EXT_TX_EDST_MARGIN 0.3 -static const TX_TYPE_1D vtx_tab[TX_TYPES] = { - DCT_1D, ADST_1D, DCT_1D, ADST_1D, -#if CONFIG_EXT_TX - FLIPADST_1D, DCT_1D, FLIPADST_1D, ADST_1D, FLIPADST_1D, IDTX_1D, - DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D, IDTX_1D, -#endif // CONFIG_EXT_TX -}; - -static const TX_TYPE_1D htx_tab[TX_TYPES] = { - DCT_1D, DCT_1D, ADST_1D, ADST_1D, -#if CONFIG_EXT_TX - DCT_1D, FLIPADST_1D, FLIPADST_1D, FLIPADST_1D, ADST_1D, IDTX_1D, - IDTX_1D, DCT_1D, IDTX_1D, ADST_1D, IDTX_1D, FLIPADST_1D, -#endif // CONFIG_EXT_TX -}; - #if CONFIG_DAALA_DIST static int od_compute_var_4x4(od_coeff *x, int stride) { int sum; @@ -603,10 +560,9 @@ static double od_compute_dist(int qm, int activity_masking, od_coeff *x, return sum; } -static int64_t av1_daala_dist(const uint8_t *src, int src_stride, - const uint8_t *dst, int dst_stride, int bsw, - int bsh, int qm, int use_activity_masking, - int qindex) { +int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst, + int dst_stride, int bsw, int bsh, int qm, + int use_activity_masking, int qindex) { int i, j; int64_t d; DECLARE_ALIGNED(16, od_coeff, orig[MAX_TX_SQUARE]); @@ -843,7 +799,7 @@ static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize, static int prune_tx_types(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, const MACROBLOCKD *const xd, int tx_set) { #if CONFIG_EXT_TX - const int *tx_set_1D = ext_tx_used_inter_1D[tx_set]; + const int *tx_set_1D = tx_set >= 0 ? ext_tx_used_inter_1D[tx_set] : NULL; #else const int tx_set_1D[TX_TYPES_1D] = { 0 }; #endif // CONFIG_EXT_TX @@ -1100,13 +1056,10 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, int c, cost; const int16_t *scan = scan_order->scan; const int16_t *nb = scan_order->neighbors; -#if CONFIG_NEW_TOKENSET const int ref = is_inter_block(mbmi); aom_prob *blockz_probs = cm->fc->blockzero_probs[txsize_sqr_map[tx_size]][type][ref]; -#endif // CONFIG_NEW_TOKENSET - #if CONFIG_HIGHBITDEPTH const int cat6_bits = av1_get_cat6_extrabits_size(tx_size, xd->bd); #else @@ -1120,12 +1073,8 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, (void)cm; if (eob == 0) { -#if CONFIG_NEW_TOKENSET // single eob token cost = av1_cost_bit(blockz_probs[pt], 0); -#else - cost = token_costs[0][0][pt][EOB_TOKEN]; -#endif // CONFIG_NEW_TOKENSET } else { if (use_fast_coef_costing) { int band_left = *band_count++; @@ -1134,11 +1083,7 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, int v = qcoeff[0]; int16_t prev_t; cost = av1_get_token_cost(v, &prev_t, cat6_bits); -#if CONFIG_NEW_TOKENSET cost += (*token_costs)[!prev_t][pt][prev_t]; -#else - cost += (*token_costs)[0][pt][prev_t]; -#endif token_cache[0] = av1_pt_energy_class[prev_t]; ++token_costs; @@ -1150,11 +1095,7 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, v = qcoeff[rc]; cost += av1_get_token_cost(v, &t, cat6_bits); -#if CONFIG_NEW_TOKENSET cost += (*token_costs)[!t][!prev_t][t]; -#else - cost += (*token_costs)[!prev_t][!prev_t][t]; -#endif prev_t = t; if (!--band_left) { band_left = *band_count++; @@ -1163,8 +1104,7 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, } // eob token - if (band_left || CONFIG_NEW_TOKENSET) - cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; + cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; } else { // !use_fast_coef_costing int band_left = *band_count++; @@ -1172,23 +1112,12 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, // dc token int v = qcoeff[0]; int16_t tok; -#if !CONFIG_NEW_TOKENSET - unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS]; -#endif cost = av1_get_token_cost(v, &tok, cat6_bits); -#if CONFIG_NEW_TOKENSET cost += (*token_costs)[!tok][pt][tok]; -#else - cost += (*token_costs)[0][pt][tok]; -#endif token_cache[0] = av1_pt_energy_class[tok]; ++token_costs; -#if !CONFIG_NEW_TOKENSET - tok_cost_ptr = &((*token_costs)[!tok]); -#endif - // ac tokens for (c = 1; c < eob; c++) { const int rc = scan[c]; @@ -1196,26 +1125,17 @@ static int cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, int plane, v = qcoeff[rc]; cost += av1_get_token_cost(v, &tok, cat6_bits); pt = get_coef_context(nb, token_cache, c); -#if CONFIG_NEW_TOKENSET cost += (*token_costs)[!tok][pt][tok]; -#else - cost += (*tok_cost_ptr)[pt][tok]; -#endif token_cache[rc] = av1_pt_energy_class[tok]; if (!--band_left) { band_left = *band_count++; ++token_costs; } -#if !CONFIG_NEW_TOKENSET - tok_cost_ptr = &((*token_costs)[!tok]); -#endif } // eob token - if (band_left || CONFIG_NEW_TOKENSET) { - pt = get_coef_context(nb, token_cache, c); - cost += (*token_costs)[0][pt][EOB_TOKEN]; - } + pt = get_coef_context(nb, token_cache, c); + cost += (*token_costs)[0][pt][EOB_TOKEN]; } } @@ -1262,7 +1182,9 @@ static void get_txb_dimensions(const MACROBLOCKD *xd, int plane, BLOCK_SIZE plane_bsize, int blk_row, int blk_col, BLOCK_SIZE tx_bsize, int *width, int *height, int *visible_width, int *visible_height) { +#if !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT) assert(tx_bsize <= plane_bsize); +#endif // !(CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT) int txb_height = block_size_high[tx_bsize]; int txb_width = block_size_wide[tx_bsize]; const int block_height = block_size_high[plane_bsize]; @@ -1298,7 +1220,12 @@ static unsigned pixel_sse(const AV1_COMP *const cpi, const MACROBLOCKD *xd, &txb_cols, &txb_rows, &visible_cols, &visible_rows); assert(visible_rows > 0); assert(visible_cols > 0); +#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT + if ((txb_rows == visible_rows && txb_cols == visible_cols) && + tx_bsize < BLOCK_SIZES) { +#else if (txb_rows == visible_rows && txb_cols == visible_cols) { +#endif unsigned sse; cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse); return sse; @@ -1533,7 +1460,36 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, if (args->exit_early) return; if (!is_inter_block(mbmi)) { +#if CONFIG_CFL + +#if CONFIG_EC_ADAPT + FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; +#else + FRAME_CONTEXT *const ec_ctx = cm->fc; +#endif // CONFIG_EC_ADAPT + + av1_predict_intra_block_encoder_facade(x, ec_ctx, plane, block, blk_col, + blk_row, tx_size, plane_bsize); +#else av1_predict_intra_block_facade(xd, plane, block, blk_col, blk_row, tx_size); +#endif +#if CONFIG_DPCM_INTRA + const int block_raster_idx = + av1_block_index_to_raster_order(tx_size, block); + const PREDICTION_MODE mode = + (plane == 0) ? get_y_mode(xd->mi[0], block_raster_idx) : mbmi->uv_mode; + TX_TYPE tx_type = get_tx_type((plane == 0) ? PLANE_TYPE_Y : PLANE_TYPE_UV, + xd, block, tx_size); + if (av1_use_dpcm_intra(plane, mode, tx_type, mbmi)) { + int8_t skip; + av1_encode_block_intra_dpcm(cm, x, mode, plane, block, blk_row, blk_col, + plane_bsize, tx_size, tx_type, a, l, &skip); + av1_dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col, + tx_size, &this_rd_stats.dist, &this_rd_stats.sse, + OUTPUT_HAS_DECODED_PIXELS); + goto CALCULATE_RD; + } +#endif // CONFIG_DPCM_INTRA av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size); } @@ -1542,8 +1498,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, const int coeff_ctx = combine_entropy_contexts(*a, *l); av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP); - if (x->plane[plane].eobs[block] && !xd->lossless[mbmi->segment_id]) - av1_optimize_b(cm, x, plane, block, tx_size, coeff_ctx); + av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l); if (!is_inter_block(mbmi)) { struct macroblock_plane *const p = &x->plane[plane]; @@ -1566,6 +1521,9 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, cfl_store(xd->cfl, dst, dst_stride, blk_row, blk_col, tx_size); } #endif +#if CONFIG_DPCM_INTRA +CALCULATE_RD : {} +#endif // CONFIG_DPCM_INTRA rd = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.dist); if (args->this_rd + rd > args->best_rd) { args->exit_early = 1; @@ -1603,7 +1561,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, rd = AOMMIN(rd1, rd2); #if CONFIG_DAALA_DIST - if (plane == 0 && + if (plane == 0 && plane_bsize >= BLOCK_8X8 && (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) { this_rd_stats.dist = 0; this_rd_stats.sse = 0; @@ -1641,6 +1599,9 @@ static void block_8x8_rd_txfm_daala_dist(int plane, int block, int blk_row, int use_activity_masking = 0; (void)tx_size; + + assert(plane == 0); + assert(plane_bsize >= BLOCK_8X8); #if CONFIG_PVQ use_activity_masking = x->daala_enc.use_activity_masking; #endif // CONFIG_PVQ @@ -1700,10 +1661,15 @@ static void block_8x8_rd_txfm_daala_dist(int plane, int block, int blk_row, { const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane); + const uint8_t txw_unit = tx_size_wide_unit[tx_size]; + const uint8_t txh_unit = tx_size_high_unit[tx_size]; + const int step = txw_unit * txh_unit; + int offset_h = tx_size_high_unit[TX_4X4]; // The rate of the current 8x8 block is the sum of four 4x4 blocks in it. - this_rd_stats.rate = x->rate_4x4[block - max_blocks_wide - 1] + - x->rate_4x4[block - max_blocks_wide] + - x->rate_4x4[block - 1] + x->rate_4x4[block]; + this_rd_stats.rate = + x->rate_4x4[block - max_blocks_wide * offset_h - step] + + x->rate_4x4[block - max_blocks_wide * offset_h] + + x->rate_4x4[block - step] + x->rate_4x4[block]; } rd1 = RDCOST(x->rdmult, x->rddiv, this_rd_stats.rate, this_rd_stats.dist); rd2 = RDCOST(x->rdmult, x->rddiv, 0, this_rd_stats.sse); @@ -1740,10 +1706,10 @@ static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi, av1_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); #if CONFIG_DAALA_DIST - if (plane == 0 && + if (plane == 0 && bsize >= BLOCK_8X8 && (tx_size == TX_4X4 || tx_size == TX_4X8 || tx_size == TX_8X4)) - av1_foreach_8x8_transformed_block_in_plane( - xd, bsize, plane, block_rd_txfm, block_8x8_rd_txfm_daala_dist, &args); + av1_foreach_8x8_transformed_block_in_yplane( + xd, bsize, block_rd_txfm, block_8x8_rd_txfm_daala_dist, &args); else #endif // CONFIG_DAALA_DIST av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm, @@ -1812,7 +1778,12 @@ static int tx_size_cost(const AV1_COMP *const cpi, const MACROBLOCK *const x, const TX_SIZE coded_tx_size = txsize_sqr_up_map[tx_size]; const int depth = tx_size_to_depth(coded_tx_size); const int tx_size_ctx = get_tx_size_context(xd); - const int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth]; + int r_tx_size = cpi->tx_size_cost[tx_size_cat][tx_size_ctx][depth]; +#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT + if (is_quarter_tx_allowed(xd, mbmi, is_inter) && tx_size != coded_tx_size) + r_tx_size += av1_cost_bit(cm->fc->quarter_tx_size_prob, + tx_size == quarter_txsize_lookup[bsize]); +#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT return r_tx_size; } else { return 0; @@ -1924,9 +1895,7 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs, // transforms should be considered for pruning prune = prune_tx_types(cpi, bs, x, xd, -1); -#if CONFIG_REF_MV if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) return 1; -#endif // CONFIG_REF_MV if (FIXED_TX_TYPE && tx_type != get_default_tx_type(0, xd, 0, tx_size)) return 1; if (!is_inter && x->use_default_intra_tx_type && @@ -1960,7 +1929,7 @@ static int skip_txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs, return 0; } -#if CONFIG_EXT_INTER +#if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT) static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs, MACROBLOCK *x, int *r, int64_t *d, int *s, int64_t *sse, int64_t ref_best_rd) { @@ -1973,7 +1942,7 @@ static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs, *sse = rd_stats.sse; return rd; } -#endif // CONFIG_EXT_INTER +#endif // CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT) static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x, RD_STATS *rd_stats, int64_t ref_best_rd, @@ -2191,9 +2160,7 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi, #endif TX_TYPE tx_type; for (tx_type = tx_start; tx_type < tx_end; ++tx_type) { -#if CONFIG_REF_MV if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue; -#endif // CONFIG_REF_MV const TX_SIZE rect_tx_size = max_txsize_rect_lookup[bs]; RD_STATS this_rd_stats; int ext_tx_set = @@ -2219,6 +2186,56 @@ static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi, #endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4 } } + +#if CONFIG_RECT_TX_EXT + // test 1:4/4:1 tx + int evaluate_quarter_tx = 0; + if (is_quarter_tx_allowed(xd, mbmi, is_inter)) { + if (tx_select) { + evaluate_quarter_tx = 1; + } else { + const TX_SIZE chosen_tx_size = + tx_size_from_tx_mode(bs, cm->tx_mode, is_inter); + evaluate_quarter_tx = chosen_tx_size == quarter_txsize_lookup[bs]; + } + } + if (evaluate_quarter_tx) { + TX_TYPE tx_start = DCT_DCT; + TX_TYPE tx_end = TX_TYPES; +#if CONFIG_TXK_SEL + // The tx_type becomes dummy when lv_map is on. The tx_type search will be + // performed in av1_search_txk_type() + tx_end = DCT_DCT + 1; +#endif + TX_TYPE tx_type; + for (tx_type = tx_start; tx_type < tx_end; ++tx_type) { + if (mbmi->ref_mv_idx > 0 && tx_type != DCT_DCT) continue; + const TX_SIZE tx_size = quarter_txsize_lookup[bs]; + RD_STATS this_rd_stats; + int ext_tx_set = + get_ext_tx_set(tx_size, bs, is_inter, cm->reduced_tx_set_used); + if ((is_inter && ext_tx_used_inter[ext_tx_set][tx_type]) || + (!is_inter && ext_tx_used_intra[ext_tx_set][tx_type])) { + rd = + txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, tx_type, tx_size); + if (rd < best_rd) { +#if CONFIG_TXK_SEL + memcpy(best_txk_type, mbmi->txk_type, + sizeof(best_txk_type[0]) * num_blk); +#endif + best_tx_type = tx_type; + best_tx_size = tx_size; + best_rd = rd; + *rd_stats = this_rd_stats; + } + } +#if CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4 + const int is_inter = is_inter_block(mbmi); + if (mbmi->sb_type < BLOCK_8X8 && is_inter) break; +#endif // CONFIG_CB4X4 && !USE_TXTYPE_SEARCH_FOR_SUB8X8_IN_CB4X4 + } + } +#endif // CONFIG_RECT_TX_EXT #endif // CONFIG_EXT_TX && CONFIG_RECT_TX if (tx_select) { @@ -2334,6 +2351,7 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int mode_cost) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + assert(!is_inter_block(mbmi)); RD_STATS this_rd_stats; int row, col; int64_t temp_sse, this_rd; @@ -2348,7 +2366,21 @@ static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x, int block = 0; for (row = 0; row < max_blocks_high; row += stepr) { for (col = 0; col < max_blocks_wide; col += stepc) { +#if CONFIG_CFL + const struct macroblockd_plane *const pd = &xd->plane[0]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + +#if CONFIG_EC_ADAPT + FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; +#else + FRAME_CONTEXT *const ec_ctx = cpi->common.fc; +#endif // CONFIG_EC_ADAPT + + av1_predict_intra_block_encoder_facade(x, ec_ctx, 0, block, col, row, + tx_size, plane_bsize); +#else av1_predict_intra_block_facade(xd, 0, block, col, row, tx_size); +#endif block += step; } } @@ -2403,6 +2435,28 @@ static void extend_palette_color_map(uint8_t *const color_map, int orig_width, } } +#if CONFIG_PALETTE_DELTA_ENCODING +// Bias toward using colors in the cache. +// TODO(huisu): Try other schemes to improve compression. +static void optimize_palette_colors(uint16_t *color_cache, int n_cache, + int n_colors, int stride, + float *centroids) { + if (n_cache <= 0) return; + for (int i = 0; i < n_colors * stride; i += stride) { + float min_diff = fabsf(centroids[i] - color_cache[0]); + int idx = 0; + for (int j = 1; j < n_cache; ++j) { + float this_diff = fabsf(centroids[i] - color_cache[j]); + if (this_diff < min_diff) { + min_diff = this_diff; + idx = j; + } + } + if (min_diff < 1.5) centroids[i] = color_cache[idx]; + } +} +#endif // CONFIG_PALETTE_DELTA_ENCODING + static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int palette_ctx, int dc_mode_cost, MB_MODE_INFO *best_mbmi, @@ -2414,6 +2468,7 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mic = xd->mi[0]; MB_MODE_INFO *const mbmi = &mic->mbmi; + assert(!is_inter_block(mbmi)); int this_rate, colors, n; const int src_stride = x->plane[0].src.stride; const uint8_t *const src = x->plane[0].src.buf; @@ -2488,12 +2543,38 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x, if (rows * cols > PALETTE_MAX_BLOCK_SIZE) return 0; +#if CONFIG_PALETTE_DELTA_ENCODING + const MODE_INFO *above_mi = xd->above_mi; + const MODE_INFO *left_mi = xd->left_mi; + uint16_t color_cache[2 * PALETTE_MAX_SIZE]; + const int n_cache = + av1_get_palette_cache(above_mi, left_mi, 0, color_cache); +#endif // CONFIG_PALETTE_DELTA_ENCODING + for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2; --n) { - for (i = 0; i < n; ++i) - centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2; - av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr); - k = av1_remove_duplicates(centroids, n); + if (colors == PALETTE_MIN_SIZE) { + // Special case: These colors automatically become the centroids. + assert(colors == n); + assert(colors == 2); + centroids[0] = lb; + centroids[1] = ub; + k = 2; + } else { + for (i = 0; i < n; ++i) { + centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2; + } + av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr); +#if CONFIG_PALETTE_DELTA_ENCODING + optimize_palette_colors(color_cache, n_cache, n, 1, centroids); +#endif // CONFIG_PALETTE_DELTA_ENCODING + k = av1_remove_duplicates(centroids, n); + if (k < PALETTE_MIN_SIZE) { + // Too few unique colors to create a palette. And DC_PRED will work + // well for that case anyway. So skip. + continue; + } + } #if CONFIG_HIGHBITDEPTH if (cpi->common.use_highbitdepth) @@ -2516,7 +2597,11 @@ static int rd_pick_palette_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x, av1_cost_bit( av1_default_palette_y_mode_prob[bsize - BLOCK_8X8][palette_ctx], 1); - palette_mode_cost += av1_palette_color_cost_y(pmi, cpi->common.bit_depth); + palette_mode_cost += av1_palette_color_cost_y(pmi, +#if CONFIG_PALETTE_DELTA_ENCODING + color_cache, n_cache, +#endif // CONFIG_PALETTE_DELTA_ENCODING + cpi->common.bit_depth); for (i = 0; i < rows; ++i) { for (j = (i == 0 ? 1 : 0); j < cols; ++j) { int color_idx; @@ -2570,6 +2655,7 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( const AV1_COMMON *const cm = &cpi->common; PREDICTION_MODE mode; MACROBLOCKD *const xd = &x->e_mbd; + assert(!is_inter_block(&xd->mi[0]->mbmi)); int64_t best_rd = rd_thresh; struct macroblock_plane *p = &x->plane[0]; struct macroblockd_plane *pd = &xd->plane[0]; @@ -2577,7 +2663,7 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( const int dst_stride = pd->dst.stride; const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4]; uint8_t *dst_init = &pd->dst.buf[row * 4 * dst_stride + col * 4]; -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_2X2 // TODO(jingning): This is a temporal change. The whole function should be // out when cb4x4 is enabled. ENTROPY_CONTEXT ta[4], tempa[4]; @@ -2585,7 +2671,7 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( #else ENTROPY_CONTEXT ta[2], tempa[2]; ENTROPY_CONTEXT tl[2], templ[2]; -#endif // CONFIG_CB4X4 +#endif // CONFIG_CHROMA_2X2 const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize]; const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize]; @@ -2738,7 +2824,8 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( #if !CONFIG_PVQ av1_xform_quant(cm, x, 0, block, row + idy, col + idx, BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP); - av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx); + av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx, + templ + idy); ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, tempa + idx, templ + idy, cpi->sf.use_fast_coef_costing); @@ -2897,9 +2984,8 @@ static int64_t rd_pick_intra_sub_8x8_y_subblock_mode( #endif // CONFIG_CB4X4 BLOCK_8X8, tx_size, coeff_ctx, xform_quant); - if (!is_lossless) { - av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx); - } + av1_optimize_b(cm, x, 0, block, BLOCK_8X8, tx_size, tempa + idx, + templ + idy); ratey += av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, tempa + idx, @@ -3013,6 +3099,7 @@ static int64_t rd_pick_intra_sub_8x8_y_mode(const AV1_COMP *const cpi, const MODE_INFO *above_mi = xd->above_mi; const MODE_INFO *left_mi = xd->left_mi; MB_MODE_INFO *const mbmi = &mic->mbmi; + assert(!is_inter_block(mbmi)); const BLOCK_SIZE bsize = mbmi->sb_type; const int pred_width_in_4x4_blocks = num_4x4_blocks_wide_lookup[bsize]; const int pred_height_in_4x4_blocks = num_4x4_blocks_high_lookup[bsize]; @@ -3220,6 +3307,7 @@ static int64_t calc_rd_given_intra_angle( RD_STATS tokenonly_rd_stats; int64_t this_rd, this_model_rd; MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi; + assert(!is_inter_block(mbmi)); mbmi->angle_delta[0] = angle_delta; this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost); @@ -3261,6 +3349,7 @@ static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mic = xd->mi[0]; MB_MODE_INFO *mbmi = &mic->mbmi; + assert(!is_inter_block(mbmi)); int i, angle_delta, best_angle_delta = 0; int first_try = 1; #if CONFIG_INTRA_INTERP @@ -3393,32 +3482,40 @@ static const uint8_t gradient_to_angle_bin[2][7][16] = { }, }; +/* clang-format off */ static const uint8_t mode_to_angle_bin[INTRA_MODES] = { 0, 2, 6, 0, 4, 3, 5, 7, 1, 0, +#if CONFIG_ALT_INTRA + 0, +#endif // CONFIG_ALT_INTRA }; +/* clang-format on */ static void angle_estimation(const uint8_t *src, int src_stride, int rows, - int cols, uint8_t *directional_mode_skip_mask) { - int i, r, c, index, dx, dy, temp, sn, remd, quot; + int cols, BLOCK_SIZE bsize, + uint8_t *directional_mode_skip_mask) { + memset(directional_mode_skip_mask, 0, + INTRA_MODES * sizeof(*directional_mode_skip_mask)); + // Sub-8x8 blocks do not use extra directions. + if (bsize < BLOCK_8X8) return; uint64_t hist[DIRECTIONAL_MODES]; - uint64_t hist_sum = 0; - memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0])); src += src_stride; + int r, c, dx, dy; for (r = 1; r < rows; ++r) { for (c = 1; c < cols; ++c) { dx = src[c] - src[c - 1]; dy = src[c] - src[c - src_stride]; - temp = dx * dx + dy * dy; + int index; + const int temp = dx * dx + dy * dy; if (dy == 0) { index = 2; } else { - sn = (dx > 0) ^ (dy > 0); + const int sn = (dx > 0) ^ (dy > 0); dx = abs(dx); dy = abs(dy); - remd = dx % dy; - quot = dx / dy; - remd = remd * 16 / dy; + const int remd = (dx % dy) * 16 / dy; + const int quot = dx / dy; index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)]; } hist[index] += temp; @@ -3426,9 +3523,11 @@ static void angle_estimation(const uint8_t *src, int src_stride, int rows, src += src_stride; } + int i; + uint64_t hist_sum = 0; for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i]; for (i = 0; i < INTRA_MODES; ++i) { - if (i != DC_PRED && i != TM_PRED) { + if (av1_is_directional_mode(i, bsize)) { const uint8_t angle_bin = mode_to_angle_bin[i]; uint64_t score = 2 * hist[angle_bin]; int weight = 2; @@ -3448,29 +3547,31 @@ static void angle_estimation(const uint8_t *src, int src_stride, int rows, #if CONFIG_HIGHBITDEPTH static void highbd_angle_estimation(const uint8_t *src8, int src_stride, - int rows, int cols, + int rows, int cols, BLOCK_SIZE bsize, uint8_t *directional_mode_skip_mask) { - int i, r, c, index, dx, dy, temp, sn, remd, quot; - uint64_t hist[DIRECTIONAL_MODES]; - uint64_t hist_sum = 0; + memset(directional_mode_skip_mask, 0, + INTRA_MODES * sizeof(*directional_mode_skip_mask)); + // Sub-8x8 blocks do not use extra directions. + if (bsize < BLOCK_8X8) return; uint16_t *src = CONVERT_TO_SHORTPTR(src8); - + uint64_t hist[DIRECTIONAL_MODES]; memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0])); src += src_stride; + int r, c, dx, dy; for (r = 1; r < rows; ++r) { for (c = 1; c < cols; ++c) { dx = src[c] - src[c - 1]; dy = src[c] - src[c - src_stride]; - temp = dx * dx + dy * dy; + int index; + const int temp = dx * dx + dy * dy; if (dy == 0) { index = 2; } else { - sn = (dx > 0) ^ (dy > 0); + const int sn = (dx > 0) ^ (dy > 0); dx = abs(dx); dy = abs(dy); - remd = dx % dy; - quot = dx / dy; - remd = remd * 16 / dy; + const int remd = (dx % dy) * 16 / dy; + const int quot = dx / dy; index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)]; } hist[index] += temp; @@ -3478,9 +3579,11 @@ static void highbd_angle_estimation(const uint8_t *src8, int src_stride, src += src_stride; } + int i; + uint64_t hist_sum = 0; for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i]; for (i = 0; i < INTRA_MODES; ++i) { - if (i != DC_PRED && i != TM_PRED) { + if (av1_is_directional_mode(i, bsize)) { const uint8_t angle_bin = mode_to_angle_bin[i]; uint64_t score = 2 * hist[angle_bin]; int weight = 2; @@ -3509,6 +3612,7 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mic = xd->mi[0]; MB_MODE_INFO *const mbmi = &mic->mbmi; + assert(!is_inter_block(mbmi)); MB_MODE_INFO best_mbmi = *mbmi; int64_t best_model_rd = INT64_MAX; #if CONFIG_EXT_INTRA @@ -3552,15 +3656,14 @@ static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, #if CONFIG_EXT_INTRA mbmi->angle_delta[0] = 0; - memset(directional_mode_skip_mask, 0, - sizeof(directional_mode_skip_mask[0]) * INTRA_MODES); #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - highbd_angle_estimation(src, src_stride, rows, cols, + highbd_angle_estimation(src, src_stride, rows, cols, bsize, directional_mode_skip_mask); else #endif // CONFIG_HIGHBITDEPTH - angle_estimation(src, src_stride, rows, cols, directional_mode_skip_mask); + angle_estimation(src, src_stride, rows, cols, bsize, + directional_mode_skip_mask); #endif // CONFIG_EXT_INTRA #if CONFIG_FILTER_INTRA mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0; @@ -3833,7 +3936,7 @@ void av1_tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP); - av1_optimize_b(cm, x, plane, block, tx_size, coeff_ctx); + av1_optimize_b(cm, x, plane, block, plane_bsize, tx_size, a, l); // TODO(any): Use av1_dist_block to compute distortion #if CONFIG_HIGHBITDEPTH @@ -3936,9 +4039,8 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, ENTROPY_CONTEXT *pta = ta + blk_col; ENTROPY_CONTEXT *ptl = tl + blk_row; int coeff_ctx, i; - int ctx = - txfm_partition_context(tx_above + (blk_col >> 1), - tx_left + (blk_row >> 1), mbmi->sb_type, tx_size); + int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row, + mbmi->sb_type, tx_size); int64_t sum_rd = INT64_MAX; int tmp_eob = 0; int zero_blk_rate; @@ -4042,8 +4144,8 @@ static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int idx, idy; for (i = 0; i < tx_size_wide_unit[tx_size]; ++i) pta[i] = !(tmp_eob == 0); for (i = 0; i < tx_size_high_unit[tx_size]; ++i) ptl[i] = !(tmp_eob == 0); - txfm_partition_update(tx_above + (blk_col >> 1), tx_left + (blk_row >> 1), - tx_size, tx_size); + txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size, + tx_size); inter_tx_size[0][0] = tx_size; for (idy = 0; idy < tx_size_high_unit[tx_size] / 2; ++idy) for (idx = 0; idx < tx_size_wide_unit[tx_size] / 2; ++idx) @@ -4082,17 +4184,15 @@ static void inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size]; ENTROPY_CONTEXT ctxa[2 * MAX_MIB_SIZE]; ENTROPY_CONTEXT ctxl[2 * MAX_MIB_SIZE]; - TXFM_CONTEXT tx_above[MAX_MIB_SIZE]; - TXFM_CONTEXT tx_left[MAX_MIB_SIZE]; + TXFM_CONTEXT tx_above[MAX_MIB_SIZE * 2]; + TXFM_CONTEXT tx_left[MAX_MIB_SIZE * 2]; RD_STATS pn_rd_stats; av1_init_rd_stats(&pn_rd_stats); av1_get_entropy_contexts(bsize, 0, pd, ctxa, ctxl); - memcpy(tx_above, xd->above_txfm_context, - sizeof(TXFM_CONTEXT) * (mi_width >> 1)); - memcpy(tx_left, xd->left_txfm_context, - sizeof(TXFM_CONTEXT) * (mi_height >> 1)); + memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width); + memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height); for (idy = 0; idy < mi_height; idy += bh) { for (idx = 0; idx < mi_width; idx += bw) { @@ -4137,8 +4237,8 @@ static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x, const int max_blocks_wide = max_block_wide(xd, bsize, 0); mbmi->tx_type = tx_type; - mbmi->min_tx_size = TX_SIZES_ALL; inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, rd_stats_stack); + mbmi->min_tx_size = get_min_tx_size(mbmi->inter_tx_size[0][0]); if (rd_stats->rate == INT_MAX) return INT64_MAX; @@ -4350,7 +4450,8 @@ static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, #if CONFIG_CB4X4 && !CONFIG_CHROMA_2X2 if (x->skip_chroma_rd) return is_cost_valid; - bsize = AOMMAX(BLOCK_8X8, bsize); + bsize = scale_chroma_bsize(mbmi->sb_type, xd->plane[1].subsampling_x, + xd->plane[1].subsampling_y); #endif // CONFIG_CB4X4 && !CONFIG_CHROMA_2X2 #if CONFIG_EXT_TX && CONFIG_RECT_TX @@ -4426,6 +4527,7 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, int *skippable) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; + assert(!is_inter_block(mbmi)); PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; const BLOCK_SIZE bsize = mbmi->sb_type; int this_rate; @@ -4460,6 +4562,13 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, } #endif // CONFIG_HIGHBITDEPTH +#if CONFIG_PALETTE_DELTA_ENCODING + const MODE_INFO *above_mi = xd->above_mi; + const MODE_INFO *left_mi = xd->left_mi; + uint16_t color_cache[2 * PALETTE_MAX_SIZE]; + const int n_cache = av1_get_palette_cache(above_mi, left_mi, 1, color_cache); +#endif // CONFIG_PALETTE_DELTA_ENCODING + colors = colors_u > colors_v ? colors_u : colors_v; if (colors > 1 && colors <= 64) { int r, c, n, i, j; @@ -4524,6 +4633,7 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, } av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr); #if CONFIG_PALETTE_DELTA_ENCODING + optimize_palette_colors(color_cache, n_cache, n, 2, centroids); // Sort the U channel colors in ascending order. for (i = 0; i < 2 * (n - 1); i += 2) { int min_idx = i; @@ -4563,7 +4673,11 @@ static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, write_uniform_cost(n, color_map[0]) + av1_cost_bit( av1_default_palette_uv_mode_prob[pmi->palette_size[0] > 0], 1); - this_rate += av1_palette_color_cost_uv(pmi, cpi->common.bit_depth); + this_rate += av1_palette_color_cost_uv(pmi, +#if CONFIG_PALETTE_DELTA_ENCODING + color_cache, n_cache, +#endif // CONFIG_PALETTE_DELTA_ENCODING + cpi->common.bit_depth); for (i = 0; i < rows; ++i) { for (j = (i == 0 ? 1 : 0); j < cols; ++j) { int color_idx; @@ -4660,6 +4774,7 @@ static int64_t pick_intra_angle_routine_sbuv( int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats, int *best_angle_delta, int64_t *best_rd) { MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi; + assert(!is_inter_block(mbmi)); int this_rate; int64_t this_rd; RD_STATS tokenonly_rd_stats; @@ -4687,6 +4802,7 @@ static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, RD_STATS *rd_stats) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + assert(!is_inter_block(mbmi)); int i, angle_delta, best_angle_delta = 0; int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)]; @@ -4736,12 +4852,23 @@ static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, } #endif // CONFIG_EXT_INTRA +static void init_sbuv_mode(MB_MODE_INFO *const mbmi) { + mbmi->uv_mode = DC_PRED; +#if CONFIG_PALETTE + mbmi->palette_mode_info.palette_size[1] = 0; +#endif // CONFIG_PALETTE +#if CONFIG_FILTER_INTRA + mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0; +#endif // CONFIG_FILTER_INTRA +} + static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE bsize, TX_SIZE max_tx_size) { MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + assert(!is_inter_block(mbmi)); MB_MODE_INFO best_mbmi = *mbmi; PREDICTION_MODE mode; int64_t best_rd = INT64_MAX, this_rd; @@ -4756,12 +4883,6 @@ static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, uint8_t *best_palette_color_map = NULL; #endif // CONFIG_PALETTE -#if CONFIG_FILTER_INTRA - mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0; -#endif // CONFIG_FILTER_INTRA -#if CONFIG_PALETTE - pmi->palette_size[1] = 0; -#endif // CONFIG_PALETTE for (mode = DC_PRED; mode <= TM_PRED; ++mode) { #if CONFIG_EXT_INTRA const int is_directional_mode = @@ -4858,12 +4979,12 @@ static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x, // Use an estimated rd for uv_intra based on DC_PRED if the // appropriate speed flag is set. (void)ctx; + init_sbuv_mode(&x->e_mbd.mi[0]->mbmi); #if CONFIG_CB4X4 #if CONFIG_CHROMA_2X2 rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, bsize, max_tx_size); #else - max_tx_size = AOMMAX(max_tx_size, TX_4X4); if (x->skip_chroma_rd) { *rate_uv = 0; *rate_uv_tokenonly = 0; @@ -4893,7 +5014,6 @@ static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode, } #endif -#if CONFIG_REF_MV int mode_cost = 0; int16_t mode_ctx = mode_context & NEWMV_CTX_MASK; int16_t is_all_zero_mv = mode_context & (1 << ALL_ZERO_FLAG_OFFSET); @@ -4924,13 +5044,9 @@ static int cost_mv_ref(const AV1_COMP *const cpi, PREDICTION_MODE mode, return mode_cost; } } -#else - assert(is_inter_mode(mode)); - return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)]; -#endif // CONFIG_REF_MV } -#if CONFIG_EXT_INTER +#if CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT) static int get_interinter_compound_type_bits(BLOCK_SIZE bsize, COMPOUND_TYPE comp_type) { (void)bsize; @@ -4945,304 +5061,7 @@ static int get_interinter_compound_type_bits(BLOCK_SIZE bsize, default: assert(0); return 0; } } -#endif // CONFIG_EXT_INTER - -static int set_and_cost_bmi_mvs( - const AV1_COMP *const cpi, MACROBLOCK *x, MACROBLOCKD *xd, int i, - PREDICTION_MODE mode, int_mv this_mv[2], - int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], - int_mv seg_mvs[TOTAL_REFS_PER_FRAME], -#if CONFIG_EXT_INTER - int_mv compound_seg_newmvs[2], -#endif // CONFIG_EXT_INTER - int_mv *best_ref_mv[2], const int *mvjcost, int *mvcost[2], int mi_row, - int mi_col) { - MODE_INFO *const mic = xd->mi[0]; - const MB_MODE_INFO *const mbmi = &mic->mbmi; - const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; - int thismvcost = 0; - int idx, idy; - const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mbmi->sb_type]; - const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mbmi->sb_type]; - const int is_compound = has_second_ref(mbmi); - int mode_ctx; - (void)mi_row; - (void)mi_col; - - switch (mode) { - case NEWMV: this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int; -#if CONFIG_EXT_INTER - if (!cpi->common.allow_high_precision_mv) - lower_mv_precision(&this_mv[0].as_mv, 0); -#endif // CONFIG_EXT_INTER - -#if CONFIG_REF_MV - for (idx = 0; idx < 1 + is_compound; ++idx) { - this_mv[idx] = seg_mvs[mbmi->ref_frame[idx]]; - av1_set_mvcost(x, mbmi->ref_frame[idx], idx, mbmi->ref_mv_idx); - thismvcost += - av1_mv_bit_cost(&this_mv[idx].as_mv, &best_ref_mv[idx]->as_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT_SUB); - } - (void)mvjcost; - (void)mvcost; -#else - thismvcost += av1_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv, - mvjcost, mvcost, MV_COST_WEIGHT_SUB); -#if !CONFIG_EXT_INTER - if (is_compound) { - this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int; - thismvcost += av1_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv, - mvjcost, mvcost, MV_COST_WEIGHT_SUB); - } -#endif // !CONFIG_EXT_INTER -#endif // CONFIG_REF_MV - break; - case NEARMV: - case NEARESTMV: - this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int; - if (is_compound) - this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int; - break; - case ZEROMV: { - int ref; - for (ref = 0; ref < 1 + is_compound; ++ref) { -#if CONFIG_GLOBAL_MOTION - this_mv[ref].as_int = - gm_get_motion_vector( - &cpi->common.global_motion[mbmi->ref_frame[ref]], - cpi->common.allow_high_precision_mv, mbmi->sb_type, mi_col, - mi_row, i) - .as_int; -#else - this_mv[ref].as_int = 0; -#endif // CONFIG_GLOBAL_MOTION - } - break; - } -#if CONFIG_EXT_INTER - case NEW_NEWMV: - if (compound_seg_newmvs[0].as_int == INVALID_MV || - compound_seg_newmvs[1].as_int == INVALID_MV) { - this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int; - this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int; - } else { - this_mv[0].as_int = compound_seg_newmvs[0].as_int; - this_mv[1].as_int = compound_seg_newmvs[1].as_int; - } - if (!cpi->common.allow_high_precision_mv) - lower_mv_precision(&this_mv[0].as_mv, 0); - if (!cpi->common.allow_high_precision_mv) - lower_mv_precision(&this_mv[1].as_mv, 0); -#if CONFIG_REF_MV - av1_set_mvcost(x, mbmi->ref_frame[0], 0, mbmi->ref_mv_idx); -#endif - thismvcost += av1_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv, - mvjcost, mvcost, MV_COST_WEIGHT_SUB); -#if CONFIG_REF_MV - av1_set_mvcost(x, mbmi->ref_frame[1], 1, mbmi->ref_mv_idx); -#endif - thismvcost += av1_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv, - mvjcost, mvcost, MV_COST_WEIGHT_SUB); - break; - case NEW_NEARMV: - case NEW_NEARESTMV: - this_mv[0].as_int = seg_mvs[mbmi->ref_frame[0]].as_int; - if (!cpi->common.allow_high_precision_mv) - lower_mv_precision(&this_mv[0].as_mv, 0); -#if CONFIG_REF_MV - av1_set_mvcost(x, mbmi->ref_frame[0], 0, mbmi->ref_mv_idx); -#endif - thismvcost += av1_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv, - mvjcost, mvcost, MV_COST_WEIGHT_SUB); - this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int; - break; - case NEAR_NEWMV: - case NEAREST_NEWMV: - this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int; - this_mv[1].as_int = seg_mvs[mbmi->ref_frame[1]].as_int; - if (!cpi->common.allow_high_precision_mv) - lower_mv_precision(&this_mv[1].as_mv, 0); -#if CONFIG_REF_MV - av1_set_mvcost(x, mbmi->ref_frame[1], 1, mbmi->ref_mv_idx); -#endif - thismvcost += av1_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv, - mvjcost, mvcost, MV_COST_WEIGHT_SUB); - break; - case NEAREST_NEARMV: - case NEAR_NEARESTMV: - case NEAREST_NEARESTMV: - case NEAR_NEARMV: - this_mv[0].as_int = frame_mv[mode][mbmi->ref_frame[0]].as_int; - this_mv[1].as_int = frame_mv[mode][mbmi->ref_frame[1]].as_int; - break; - case ZERO_ZEROMV: -#if CONFIG_GLOBAL_MOTION - this_mv[0].as_int = - gm_get_motion_vector(&cpi->common.global_motion[mbmi->ref_frame[0]], - cpi->common.allow_high_precision_mv, - mbmi->sb_type, mi_col, mi_row, i) - .as_int; - this_mv[1].as_int = - gm_get_motion_vector(&cpi->common.global_motion[mbmi->ref_frame[1]], - cpi->common.allow_high_precision_mv, - mbmi->sb_type, mi_col, mi_row, i) - .as_int; -#else - this_mv[0].as_int = 0; - this_mv[1].as_int = 0; -#endif // CONFIG_GLOBAL_MOTION - break; -#endif // CONFIG_EXT_INTER - default: break; - } - - mic->bmi[i].as_mv[0].as_int = this_mv[0].as_int; - if (is_compound) mic->bmi[i].as_mv[1].as_int = this_mv[1].as_int; - - mic->bmi[i].as_mode = mode; - -#if CONFIG_REF_MV - if (mode == NEWMV) { - mic->bmi[i].pred_mv[0].as_int = - mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0].as_int; - if (is_compound) - mic->bmi[i].pred_mv[1].as_int = - mbmi_ext->ref_mvs[mbmi->ref_frame[1]][0].as_int; - } else { - mic->bmi[i].pred_mv[0].as_int = this_mv[0].as_int; - if (is_compound) mic->bmi[i].pred_mv[1].as_int = this_mv[1].as_int; - } -#endif // CONFIG_REF_MV - - for (idy = 0; idy < num_4x4_blocks_high; ++idy) - for (idx = 0; idx < num_4x4_blocks_wide; ++idx) - memmove(&mic->bmi[i + idy * 2 + idx], &mic->bmi[i], sizeof(mic->bmi[i])); - -#if CONFIG_REF_MV -#if CONFIG_EXT_INTER - if (is_compound) - mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]]; - else -#endif // CONFIG_EXT_INTER - mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context, - mbmi->ref_frame, mbmi->sb_type, i); -#else // CONFIG_REF_MV - mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]]; -#endif // CONFIG_REF_MV - return cost_mv_ref(cpi, mode, mode_ctx) + thismvcost; -} - -static int64_t encode_inter_mb_segment_sub8x8( - const AV1_COMP *const cpi, MACROBLOCK *x, int64_t best_yrd, int i, - int *labelyrate, int64_t *distortion, int64_t *sse, ENTROPY_CONTEXT *ta, - ENTROPY_CONTEXT *tl, int ir, int ic, int mi_row, int mi_col) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - struct macroblockd_plane *const pd = &xd->plane[0]; - struct macroblock_plane *const p = &x->plane[0]; - MODE_INFO *const mi = xd->mi[0]; - const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd); - const int txb_width = max_block_wide(xd, plane_bsize, 0); - const int txb_height = max_block_high(xd, plane_bsize, 0); - const int width = block_size_wide[plane_bsize]; - const int height = block_size_high[plane_bsize]; - int idx, idy; - const uint8_t *const src = - &p->src.buf[av1_raster_block_offset(BLOCK_8X8, i, p->src.stride)]; - uint8_t *const dst = - &pd->dst.buf[av1_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)]; - int64_t thisdistortion = 0, thissse = 0; - int thisrate = 0; - TX_SIZE tx_size = mi->mbmi.tx_size; - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, i, tx_size); - const int num_4x4_w = tx_size_wide_unit[tx_size]; - const int num_4x4_h = tx_size_high_unit[tx_size]; -#if !CONFIG_PVQ - const SCAN_ORDER *scan_order = get_scan(cm, tx_size, tx_type, 1); -#else - (void)cpi; - (void)ta; - (void)tl; - (void)tx_type; -#endif // !CONFIG_PVQ - -#if CONFIG_EXT_TX && CONFIG_RECT_TX - assert(IMPLIES(xd->lossless[mi->mbmi.segment_id], tx_size == TX_4X4)); - assert(IMPLIES(!xd->lossless[mi->mbmi.segment_id], - tx_size == max_txsize_rect_lookup[mi->mbmi.sb_type])); -#else - assert(tx_size == TX_4X4); -#endif // CONFIG_EXT_TX && CONFIG_RECT_TX - - assert(tx_type == DCT_DCT); - - av1_build_inter_predictor_sub8x8(xd, 0, i, ir, ic, mi_row, mi_col); - -#if CONFIG_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - aom_highbd_subtract_block( - height, width, av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), - 8, src, p->src.stride, dst, pd->dst.stride, xd->bd); - } else { - aom_subtract_block(height, width, - av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), - 8, src, p->src.stride, dst, pd->dst.stride); - } -#else - aom_subtract_block(height, width, - av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), - 8, src, p->src.stride, dst, pd->dst.stride); -#endif // CONFIG_HIGHBITDEPTH - - for (idy = 0; idy < txb_height; idy += num_4x4_h) { - for (idx = 0; idx < txb_width; idx += num_4x4_w) { - int64_t dist, ssz, rd, rd1, rd2; - int coeff_ctx; - const int k = i + (idy * 2 + idx); - const int block = av1_raster_order_to_block_index(tx_size, k); - assert(IMPLIES(tx_size == TX_4X8 || tx_size == TX_8X4, - idx == 0 && idy == 0)); - coeff_ctx = combine_entropy_contexts(*(ta + (k & 1)), *(tl + (k >> 1))); - av1_xform_quant(cm, x, 0, block, idy + (i >> 1), idx + (i & 0x01), - BLOCK_8X8, tx_size, coeff_ctx, AV1_XFORM_QUANT_FP); - if (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0) - av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx); - av1_dist_block(cpi, x, 0, BLOCK_8X8, block, idy + (i >> 1), - idx + (i & 0x1), tx_size, &dist, &ssz, - OUTPUT_HAS_PREDICTED_PIXELS); - thisdistortion += dist; - thissse += ssz; -#if !CONFIG_PVQ - thisrate += - av1_cost_coeffs(cpi, x, 0, block, tx_size, scan_order, (ta + (k & 1)), - (tl + (k >> 1)), cpi->sf.use_fast_coef_costing); - *(ta + (k & 1)) = !(p->eobs[block] == 0); - *(tl + (k >> 1)) = !(p->eobs[block] == 0); -#else - thisrate += x->rate; -#endif // !CONFIG_PVQ -#if CONFIG_EXT_TX - if (tx_size == TX_8X4) { - *(ta + (k & 1) + 1) = *(ta + (k & 1)); - } - if (tx_size == TX_4X8) { - *(tl + (k >> 1) + 1) = *(tl + (k >> 1)); - } -#endif // CONFIG_EXT_TX - rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion); - rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse); - rd = AOMMIN(rd1, rd2); - if (rd >= best_yrd) return INT64_MAX; - } - } - - *distortion = thisdistortion; - *labelyrate = thisrate; - *sse = thissse; - - return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); -} +#endif // CONFIG_EXT_INTER && (CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT) typedef struct { int eobs; @@ -5252,20 +5071,18 @@ typedef struct { int64_t bsse; int64_t brdcost; int_mv mvs[2]; -#if CONFIG_REF_MV int_mv pred_mv[2]; -#endif // CONFIG_REF_MV #if CONFIG_EXT_INTER int_mv ref_mv[2]; #endif // CONFIG_EXT_INTER -#if CONFIG_CB4X4 +#if CONFIG_CHROMA_2X2 ENTROPY_CONTEXT ta[4]; ENTROPY_CONTEXT tl[4]; #else ENTROPY_CONTEXT ta[2]; ENTROPY_CONTEXT tl[2]; -#endif // CONFIG_CB4X4 +#endif // CONFIG_CHROMA_2X2 } SEG_RDSTAT; typedef struct { @@ -5293,37 +5110,13 @@ static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) { (mv->col >> 3) > mv_limits->col_max; } -static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { - MB_MODE_INFO *const mbmi = &x->e_mbd.mi[0]->mbmi; - struct macroblock_plane *const p = &x->plane[0]; - struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; - - p->src.buf = - &p->src.buf[av1_raster_block_offset(BLOCK_8X8, i, p->src.stride)]; - assert(((intptr_t)pd->pre[0].buf & 0x7) == 0); - pd->pre[0].buf = - &pd->pre[0].buf[av1_raster_block_offset(BLOCK_8X8, i, pd->pre[0].stride)]; - if (has_second_ref(mbmi)) - pd->pre[1].buf = - &pd->pre[1] - .buf[av1_raster_block_offset(BLOCK_8X8, i, pd->pre[1].stride)]; -} - -static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, - struct buf_2d orig_pre[2]) { - MB_MODE_INFO *mbmi = &x->e_mbd.mi[0]->mbmi; - x->plane[0].src = orig_src; - x->e_mbd.plane[0].pre[0] = orig_pre[0]; - if (has_second_ref(mbmi)) x->e_mbd.plane[0].pre[1] = orig_pre[1]; -} - // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion. // TODO(aconverse): Find out if this is still productive then clean up or remove static int check_best_zero_mv( const AV1_COMP *const cpi, const int16_t mode_context[TOTAL_REFS_PER_FRAME], -#if CONFIG_REF_MV && CONFIG_EXT_INTER +#if CONFIG_EXT_INTER const int16_t compound_mode_context[TOTAL_REFS_PER_FRAME], -#endif // CONFIG_REF_MV && CONFIG_EXT_INTER +#endif // CONFIG_EXT_INTER int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME], int this_mode, const MV_REFERENCE_FRAME ref_frames[2], const BLOCK_SIZE bsize, int block, int mi_row, int mi_col) { @@ -5355,21 +5148,12 @@ static int check_best_zero_mv( frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int && (ref_frames[1] <= INTRA_FRAME || frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int)) { -#if CONFIG_REF_MV int16_t rfc = av1_mode_context_analyzer(mode_context, ref_frames, bsize, block); -#else - int16_t rfc = mode_context[ref_frames[0]]; -#endif // CONFIG_REF_MV int c1 = cost_mv_ref(cpi, NEARMV, rfc); int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); int c3 = cost_mv_ref(cpi, ZEROMV, rfc); -#if !CONFIG_REF_MV - (void)bsize; - (void)block; -#endif // !CONFIG_REF_MV - if (this_mode == NEARMV) { if (c1 > c3) return 0; } else if (this_mode == NEARESTMV) { @@ -5390,40 +5174,25 @@ static int check_best_zero_mv( } } #if CONFIG_EXT_INTER - else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAREST_NEARMV || - this_mode == NEAR_NEARESTMV || this_mode == NEAR_NEARMV || + else if ((this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV || this_mode == ZERO_ZEROMV) && frame_mv[this_mode][ref_frames[0]].as_int == zeromv[0].as_int && frame_mv[this_mode][ref_frames[1]].as_int == zeromv[1].as_int) { -#if CONFIG_REF_MV int16_t rfc = compound_mode_context[ref_frames[0]]; -#else - int16_t rfc = mode_context[ref_frames[0]]; -#endif // CONFIG_REF_MV - int c1 = cost_mv_ref(cpi, NEAREST_NEARMV, rfc); int c2 = cost_mv_ref(cpi, NEAREST_NEARESTMV, rfc); int c3 = cost_mv_ref(cpi, ZERO_ZEROMV, rfc); - int c4 = cost_mv_ref(cpi, NEAR_NEARESTMV, rfc); int c5 = cost_mv_ref(cpi, NEAR_NEARMV, rfc); - if (this_mode == NEAREST_NEARMV) { - if (c1 > c3) return 0; - } else if (this_mode == NEAREST_NEARESTMV) { + if (this_mode == NEAREST_NEARESTMV) { if (c2 > c3) return 0; - } else if (this_mode == NEAR_NEARESTMV) { - if (c4 > c3) return 0; } else if (this_mode == NEAR_NEARMV) { if (c5 > c3) return 0; } else { assert(this_mode == ZERO_ZEROMV); if ((c3 >= c2 && frame_mv[NEAREST_NEARESTMV][ref_frames[0]].as_int == 0 && frame_mv[NEAREST_NEARESTMV][ref_frames[1]].as_int == 0) || - (c3 >= c1 && frame_mv[NEAREST_NEARMV][ref_frames[0]].as_int == 0 && - frame_mv[NEAREST_NEARMV][ref_frames[1]].as_int == 0) || (c3 >= c5 && frame_mv[NEAR_NEARMV][ref_frames[0]].as_int == 0 && - frame_mv[NEAR_NEARMV][ref_frames[1]].as_int == 0) || - (c3 >= c4 && frame_mv[NEAR_NEARESTMV][ref_frames[0]].as_int == 0 && - frame_mv[NEAR_NEARESTMV][ref_frames[1]].as_int == 0)) + frame_mv[NEAR_NEARMV][ref_frames[1]].as_int == 0)) return 0; } } @@ -5435,7 +5204,8 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row, int mi_col, #if CONFIG_EXT_INTER - int_mv *ref_mv_sub8x8[2], + int_mv *ref_mv_sub8x8[2], const uint8_t *mask, + int mask_stride, #endif // CONFIG_EXT_INTER int *rate_mv, const int block) { const AV1_COMMON *const cm = &cpi->common; @@ -5596,17 +5366,26 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, best_mv->col >>= 3; best_mv->row >>= 3; -#if CONFIG_REF_MV av1_set_mvcost(x, refs[id], id, mbmi->ref_mv_idx); -#endif // CONFIG_REF_MV // Small-range full-pixel motion search. bestsme = av1_refining_search_8p_c(x, sadpb, search_range, &cpi->fn_ptr[bsize], +#if CONFIG_EXT_INTER + mask, mask_stride, id, +#endif &ref_mv[id].as_mv, second_pred); - if (bestsme < INT_MAX) - bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv, - second_pred, &cpi->fn_ptr[bsize], 1); + if (bestsme < INT_MAX) { +#if CONFIG_EXT_INTER + if (mask) + bestsme = av1_get_mvpred_mask_var(x, best_mv, &ref_mv[id].as_mv, + second_pred, mask, mask_stride, id, + &cpi->fn_ptr[bsize], 1); + else +#endif + bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv, + second_pred, &cpi->fn_ptr[bsize], 1); + } x->mv_limits = tmp_mv_limits; @@ -5639,7 +5418,11 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, pw, ph, 1); + &dis, &sse, second_pred, +#if CONFIG_EXT_INTER + mask, mask_stride, id, +#endif + pw, ph, 1); // Restore the reference frames. pd->pre[0] = backup_pred; @@ -5649,7 +5432,11 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, x, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost, - &dis, &sse, second_pred, pw, ph, 0); + &dis, &sse, second_pred, +#if CONFIG_EXT_INTER + mask, mask_stride, id, +#endif + pw, ph, 0); } } @@ -5673,9 +5460,7 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[ref] = backup_yv12[ref][i]; } -#if CONFIG_REF_MV av1_set_mvcost(x, refs[ref], ref, mbmi->ref_mv_idx); -#endif // CONFIG_REF_MV #if CONFIG_EXT_INTER && !CONFIG_CB4X4 if (bsize >= BLOCK_8X8) #endif // CONFIG_EXT_INTER && !CONFIG_CB4X4 @@ -5691,947 +5476,6 @@ static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, } } -#if CONFIG_REF_MV && !CONFIG_EXT_INTER -static void update_mv_search_and_seg_mvs( - int *const run_mv_search, int_mv *const seg_mvs, int has_second_rf, - const MV_REFERENCE_FRAME *const ref_frame, - const SEG_RDSTAT *const ref_rdstat, int_mv *const bsi_ref_mv[2]) { - if (has_second_rf) { - if (seg_mvs[ref_frame[0]].as_int == ref_rdstat->mvs[0].as_int && - ref_rdstat->mvs[0].as_int != INVALID_MV) - if (bsi_ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int) - --*run_mv_search; - - if (seg_mvs[ref_frame[1]].as_int == ref_rdstat->mvs[1].as_int && - ref_rdstat->mvs[1].as_int != INVALID_MV) - if (bsi_ref_mv[1]->as_int == ref_rdstat->pred_mv[1].as_int) - --*run_mv_search; - } else { - if (bsi_ref_mv[0]->as_int == ref_rdstat->pred_mv[0].as_int && - ref_rdstat->mvs[0].as_int != INVALID_MV) { - *run_mv_search = 0; - seg_mvs[ref_frame[0]].as_int = ref_rdstat->mvs[0].as_int; - } - } -} -#endif // CONFIG_REF_MV && !CONFIG_EXT_INTER - -static int64_t rd_pick_inter_best_sub8x8_mode( - const AV1_COMP *const cpi, MACROBLOCK *x, int_mv *best_ref_mv, - int_mv *second_best_ref_mv, int64_t best_rd, int *returntotrate, - int *returnyrate, int64_t *returndistortion, int *skippable, int64_t *psse, - int mvthresh, int_mv seg_mvs[4][TOTAL_REFS_PER_FRAME], -#if CONFIG_EXT_INTER - int_mv compound_seg_newmvs[4][2], -#endif // CONFIG_EXT_INTER - BEST_SEG_INFO *bsi_buf, int filter_idx, int mi_row, int mi_col) { - BEST_SEG_INFO *bsi = bsi_buf + filter_idx; -#if CONFIG_REF_MV - int_mv tmp_ref_mv[2]; -#endif // CONFIG_REF_MV - MACROBLOCKD *xd = &x->e_mbd; - MODE_INFO *mi = xd->mi[0]; - MB_MODE_INFO *mbmi = &mi->mbmi; - int mode_idx; - int k, br = 0, idx, idy; - int64_t bd = 0, block_sse = 0; - PREDICTION_MODE this_mode; - const AV1_COMMON *cm = &cpi->common; - struct macroblock_plane *const p = &x->plane[0]; - struct macroblockd_plane *const pd = &xd->plane[0]; - const int label_count = 4; - int64_t this_segment_rd = 0; - int label_mv_thresh; - int segmentyrate = 0; - const BLOCK_SIZE bsize = mbmi->sb_type; - const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; - const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; -#if CONFIG_CB4X4 - ENTROPY_CONTEXT t_above[4], t_left[4]; -#else - ENTROPY_CONTEXT t_above[2], t_left[2]; -#endif // CONFIG_CB4X4 - int subpelmv = 1, have_ref = 0; - const int has_second_rf = has_second_ref(mbmi); - const int inter_mode_mask = cpi->sf.inter_mode_mask[bsize]; - MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; -#if CONFIG_PVQ - od_rollback_buffer pre_buf; - - od_encode_checkpoint(&x->daala_enc, &pre_buf); -#endif // CONFIG_PVQ -#if CONFIG_EXT_TX && CONFIG_RECT_TX - mbmi->tx_size = - xd->lossless[mbmi->segment_id] ? TX_4X4 : max_txsize_rect_lookup[bsize]; -#else - mbmi->tx_size = TX_4X4; -#endif // CONFIG_EXT_TX && CONFIG_RECT_TX - - av1_zero(*bsi); - - bsi->segment_rd = best_rd; - bsi->ref_mv[0] = best_ref_mv; - bsi->ref_mv[1] = second_best_ref_mv; - bsi->mvp.as_int = best_ref_mv->as_int; - bsi->mvthresh = mvthresh; - - for (idx = 0; idx < 4; ++idx) bsi->modes[idx] = ZEROMV; - -#if CONFIG_REF_MV - for (idx = 0; idx < 4; ++idx) { - for (k = NEARESTMV; k <= NEWMV; ++k) { - bsi->rdstat[idx][INTER_OFFSET(k)].pred_mv[0].as_int = INVALID_MV; - bsi->rdstat[idx][INTER_OFFSET(k)].pred_mv[1].as_int = INVALID_MV; - - bsi->rdstat[idx][INTER_OFFSET(k)].mvs[0].as_int = INVALID_MV; - bsi->rdstat[idx][INTER_OFFSET(k)].mvs[1].as_int = INVALID_MV; - } - } -#endif // CONFIG_REF_MV - - memcpy(t_above, pd->above_context, sizeof(t_above)); - memcpy(t_left, pd->left_context, sizeof(t_left)); - - // 64 makes this threshold really big effectively - // making it so that we very rarely check mvs on - // segments. setting this to 1 would make mv thresh - // roughly equal to what it is for macroblocks - label_mv_thresh = 1 * bsi->mvthresh / label_count; - - // Segmentation method overheads - for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { - for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { - // TODO(jingning,rbultje): rewrite the rate-distortion optimization - // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop - int_mv mode_mv[MB_MODE_COUNT][2]; - int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME]; - PREDICTION_MODE mode_selected = ZEROMV; - int64_t new_best_rd = INT64_MAX; - const int index = idy * 2 + idx; - int ref; -#if CONFIG_REF_MV - CANDIDATE_MV ref_mv_stack[2][MAX_REF_MV_STACK_SIZE]; - uint8_t ref_mv_count[2]; -#endif // CONFIG_REF_MV -#if CONFIG_EXT_INTER - int_mv ref_mvs_sub8x8[2][2]; -#endif // CONFIG_EXT_INTER -#if CONFIG_PVQ - od_rollback_buffer idx_buf, post_buf; - od_encode_checkpoint(&x->daala_enc, &idx_buf); - od_encode_checkpoint(&x->daala_enc, &post_buf); -#endif // CONFIG_PVQ - - for (ref = 0; ref < 1 + has_second_rf; ++ref) { - const MV_REFERENCE_FRAME frame = mbmi->ref_frame[ref]; -#if CONFIG_EXT_INTER - int_mv mv_ref_list[MAX_MV_REF_CANDIDATES]; - av1_update_mv_context(cm, xd, mi, frame, mv_ref_list, index, mi_row, - mi_col, NULL); -#endif // CONFIG_EXT_INTER -#if CONFIG_GLOBAL_MOTION - frame_mv[ZEROMV][frame].as_int = - gm_get_motion_vector(&cm->global_motion[frame], - cm->allow_high_precision_mv, mbmi->sb_type, - mi_col, mi_row, index) - .as_int; -#else // CONFIG_GLOBAL_MOTION - frame_mv[ZEROMV][frame].as_int = 0; -#endif // CONFIG_GLOBAL_MOTION - av1_append_sub8x8_mvs_for_idx(cm, xd, index, ref, mi_row, mi_col, -#if CONFIG_REF_MV - ref_mv_stack[ref], &ref_mv_count[ref], -#endif // CONFIG_REF_MV -#if CONFIG_EXT_INTER - mv_ref_list, -#endif // CONFIG_EXT_INTER - &frame_mv[NEARESTMV][frame], - &frame_mv[NEARMV][frame]); - -#if CONFIG_REF_MV - tmp_ref_mv[ref] = frame_mv[NEARESTMV][mbmi->ref_frame[ref]]; - lower_mv_precision(&tmp_ref_mv[ref].as_mv, cm->allow_high_precision_mv); - bsi->ref_mv[ref] = &tmp_ref_mv[ref]; - mbmi_ext->ref_mvs[frame][0] = tmp_ref_mv[ref]; -#endif // CONFIG_REF_MV - -#if CONFIG_EXT_INTER - mv_ref_list[0].as_int = frame_mv[NEARESTMV][frame].as_int; - mv_ref_list[1].as_int = frame_mv[NEARMV][frame].as_int; - av1_find_best_ref_mvs(cm->allow_high_precision_mv, mv_ref_list, - &ref_mvs_sub8x8[0][ref], &ref_mvs_sub8x8[1][ref]); - - if (has_second_rf) { -#if CONFIG_GLOBAL_MOTION - frame_mv[ZERO_ZEROMV][frame].as_int = - gm_get_motion_vector(&cm->global_motion[frame], - cm->allow_high_precision_mv, mbmi->sb_type, - mi_col, mi_row, index) - .as_int; -#else - frame_mv[ZERO_ZEROMV][frame].as_int = 0; -#endif // CONFIG_GLOBAL_MOTION - frame_mv[NEAREST_NEARESTMV][frame].as_int = - frame_mv[NEARESTMV][frame].as_int; - - if (ref == 0) { - frame_mv[NEAREST_NEARMV][frame].as_int = - frame_mv[NEARESTMV][frame].as_int; - frame_mv[NEAR_NEARESTMV][frame].as_int = - frame_mv[NEARMV][frame].as_int; - frame_mv[NEAREST_NEWMV][frame].as_int = - frame_mv[NEARESTMV][frame].as_int; - frame_mv[NEAR_NEWMV][frame].as_int = frame_mv[NEARMV][frame].as_int; - frame_mv[NEAR_NEARMV][frame].as_int = - frame_mv[NEARMV][frame].as_int; - } else if (ref == 1) { - frame_mv[NEAREST_NEARMV][frame].as_int = - frame_mv[NEARMV][frame].as_int; - frame_mv[NEAR_NEARESTMV][frame].as_int = - frame_mv[NEARESTMV][frame].as_int; - frame_mv[NEW_NEARESTMV][frame].as_int = - frame_mv[NEARESTMV][frame].as_int; - frame_mv[NEW_NEARMV][frame].as_int = frame_mv[NEARMV][frame].as_int; - frame_mv[NEAR_NEARMV][frame].as_int = - frame_mv[NEARMV][frame].as_int; - } - } -#endif // CONFIG_EXT_INTER - } - -// search for the best motion vector on this segment -#if CONFIG_EXT_INTER - for (this_mode = (has_second_rf ? NEAREST_NEARESTMV : NEARESTMV); - this_mode <= (has_second_rf ? NEW_NEWMV : NEWMV); ++this_mode) -#else - for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) -#endif // CONFIG_EXT_INTER - { - const struct buf_2d orig_src = x->plane[0].src; - struct buf_2d orig_pre[2]; - // This flag controls if the motion estimation will kick off. When it - // is set to a non-zero value, the encoder will force motion estimation. - int run_mv_search = 0; - - mode_idx = INTER_OFFSET(this_mode); -#if CONFIG_EXT_INTER - for (ref = 0; ref < 1 + has_second_rf; ++ref) - bsi->ref_mv[ref]->as_int = ref_mvs_sub8x8[0][ref].as_int; -#endif // CONFIG_EXT_INTER - bsi->rdstat[index][mode_idx].brdcost = INT64_MAX; - if (!(inter_mode_mask & (1 << this_mode))) continue; - -#if CONFIG_REF_MV - run_mv_search = 2; -#if !CONFIG_EXT_INTER - if (filter_idx > 0 && this_mode == NEWMV) { - const BEST_SEG_INFO *ref_bsi = bsi_buf; - const SEG_RDSTAT *ref_rdstat = &ref_bsi->rdstat[index][mode_idx]; - - update_mv_search_and_seg_mvs(&run_mv_search, seg_mvs[index], - has_second_rf, mbmi->ref_frame, - ref_rdstat, bsi->ref_mv); - - if (run_mv_search != 0 && filter_idx > 1) { - ref_bsi = bsi_buf + 1; - ref_rdstat = &ref_bsi->rdstat[index][mode_idx]; - run_mv_search = 2; - update_mv_search_and_seg_mvs(&run_mv_search, seg_mvs[index], - has_second_rf, mbmi->ref_frame, - ref_rdstat, bsi->ref_mv); - } - } -#endif // !CONFIG_EXT_INTER -#endif // CONFIG_REF_MV - -#if CONFIG_GLOBAL_MOTION - if (cm->global_motion[mbmi->ref_frame[0]].wmtype == IDENTITY && - (!has_second_rf || - cm->global_motion[mbmi->ref_frame[1]].wmtype == IDENTITY)) -#endif // CONFIG_GLOBAL_MOTION - - if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, -#if CONFIG_REF_MV && CONFIG_EXT_INTER - mbmi_ext->compound_mode_context, -#endif // CONFIG_REF_MV && CONFIG_EXT_INTER - frame_mv, this_mode, mbmi->ref_frame, bsize, - index, mi_row, mi_col)) - continue; - - memcpy(orig_pre, pd->pre, sizeof(orig_pre)); - memcpy(bsi->rdstat[index][mode_idx].ta, t_above, - sizeof(bsi->rdstat[index][mode_idx].ta)); - memcpy(bsi->rdstat[index][mode_idx].tl, t_left, - sizeof(bsi->rdstat[index][mode_idx].tl)); -#if CONFIG_PVQ - od_encode_rollback(&x->daala_enc, &idx_buf); -#endif // CONFIG_PVQ - - // motion search for newmv (single predictor case only) - if (!has_second_rf && -#if CONFIG_EXT_INTER - have_newmv_in_inter_mode(this_mode) && - (seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV) -#else - this_mode == NEWMV && - (seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV || - run_mv_search) -#endif // CONFIG_EXT_INTER - ) { - int step_param = 0; - int bestsme = INT_MAX; - int sadpb = x->sadperbit4; - MV mvp_full; - int max_mv; - int cost_list[5]; - MvLimits tmp_mv_limits = x->mv_limits; - - /* Is the best so far sufficiently good that we cant justify doing - * and new motion search. */ - if (new_best_rd < label_mv_thresh) break; - -#if CONFIG_EXT_INTER - bsi->mvp.as_int = bsi->ref_mv[0]->as_int; -#else -// use previous block's result as next block's MV predictor. -#if !CONFIG_REF_MV - if (index > 0) { - bsi->mvp.as_int = mi->bmi[index - 1].as_mv[0].as_int; - if (index == 2) - bsi->mvp.as_int = mi->bmi[index - 2].as_mv[0].as_int; - } -#endif // !CONFIG_REF_MV -#endif // CONFIG_EXT_INTER - max_mv = (index == 0) ? (int)x->max_mv_context[mbmi->ref_frame[0]] - : AOMMAX(abs(bsi->mvp.as_mv.row), - abs(bsi->mvp.as_mv.col)) >> - 3; - - if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { - // Take wtd average of the step_params based on the last frame's - // max mv magnitude and the best ref mvs of the current block for - // the given reference. - step_param = - (av1_init_search_range(max_mv) + cpi->mv_step_param) / 2; - } else { - step_param = cpi->mv_step_param; - } - -#if CONFIG_REF_MV - mvp_full.row = bsi->ref_mv[0]->as_mv.row >> 3; - mvp_full.col = bsi->ref_mv[0]->as_mv.col >> 3; -#else - mvp_full.row = bsi->mvp.as_mv.row >> 3; - mvp_full.col = bsi->mvp.as_mv.col >> 3; -#endif // CONFIG_REF_MV - - if (cpi->sf.adaptive_motion_search) { - mvp_full.row = x->pred_mv[mbmi->ref_frame[0]].row >> 3; - mvp_full.col = x->pred_mv[mbmi->ref_frame[0]].col >> 3; - step_param = AOMMAX(step_param, 8); - } - - // adjust src pointer for this block - mi_buf_shift(x, index); - - av1_set_mv_search_range(&x->mv_limits, &bsi->ref_mv[0]->as_mv); - - x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV; - -#if CONFIG_REF_MV - av1_set_mvcost(x, mbmi->ref_frame[0], 0, mbmi->ref_mv_idx); -#endif // CONFIG_REF_MV - bestsme = av1_full_pixel_search( - cpi, x, bsize, &mvp_full, step_param, sadpb, - cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL, - &bsi->ref_mv[0]->as_mv, INT_MAX, 1); - - x->mv_limits = tmp_mv_limits; - - if (bestsme < INT_MAX) { - int distortion; - if (cpi->sf.use_upsampled_references) { - int best_mv_var; - const int try_second = - x->second_best_mv.as_int != INVALID_MV && - x->second_best_mv.as_int != x->best_mv.as_int; - const int pw = block_size_wide[bsize]; - const int ph = block_size_high[bsize]; - // Use up-sampled reference frames. - struct buf_2d backup_pred = pd->pre[0]; - const YV12_BUFFER_CONFIG *upsampled_ref = - get_upsampled_ref(cpi, mbmi->ref_frame[0]); - - // Set pred for Y plane - setup_pred_plane( - &pd->pre[0], bsize, upsampled_ref->y_buffer, - upsampled_ref->y_crop_width, upsampled_ref->y_crop_height, - upsampled_ref->y_stride, (mi_row << 3), (mi_col << 3), NULL, - pd->subsampling_x, pd->subsampling_y); - - // adjust pred pointer for this block - pd->pre[0].buf = - &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, index, - pd->pre[0].stride)) - << 3]; - - best_mv_var = cpi->find_fractional_mv_step( - x, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv, - x->errorperbit, &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, - cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, - &distortion, &x->pred_sse[mbmi->ref_frame[0]], NULL, pw, ph, - 1); - - if (try_second) { - int this_var; - MV best_mv = x->best_mv.as_mv; - const MV ref_mv = bsi->ref_mv[0]->as_mv; - const int minc = - AOMMAX(x->mv_limits.col_min * 8, ref_mv.col - MV_MAX); - const int maxc = - AOMMIN(x->mv_limits.col_max * 8, ref_mv.col + MV_MAX); - const int minr = - AOMMAX(x->mv_limits.row_min * 8, ref_mv.row - MV_MAX); - const int maxr = - AOMMIN(x->mv_limits.row_max * 8, ref_mv.row + MV_MAX); - - x->best_mv = x->second_best_mv; - if (x->best_mv.as_mv.row * 8 <= maxr && - x->best_mv.as_mv.row * 8 >= minr && - x->best_mv.as_mv.col * 8 <= maxc && - x->best_mv.as_mv.col * 8 >= minc) { - this_var = cpi->find_fractional_mv_step( - x, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv, - x->errorperbit, &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, - cond_cost_list(cpi, cost_list), x->nmvjointcost, - x->mvcost, &distortion, &x->pred_sse[mbmi->ref_frame[0]], - NULL, pw, ph, 1); - if (this_var < best_mv_var) best_mv = x->best_mv.as_mv; - x->best_mv.as_mv = best_mv; - } - } - - // Restore the reference frames. - pd->pre[0] = backup_pred; - } else { - cpi->find_fractional_mv_step( - x, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv, - x->errorperbit, &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, - cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, - &distortion, &x->pred_sse[mbmi->ref_frame[0]], NULL, 0, 0, 0); - } - -// save motion search result for use in compound prediction -#if CONFIG_EXT_INTER - seg_mvs[index][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv; -#else - seg_mvs[index][mbmi->ref_frame[0]].as_mv = x->best_mv.as_mv; -#endif // CONFIG_EXT_INTER - } - - if (cpi->sf.adaptive_motion_search) - x->pred_mv[mbmi->ref_frame[0]] = x->best_mv.as_mv; - -#if CONFIG_EXT_INTER - mode_mv[this_mode][0] = x->best_mv; -#else - mode_mv[NEWMV][0] = x->best_mv; -#endif // CONFIG_EXT_INTER - - // restore src pointers - mi_buf_restore(x, orig_src, orig_pre); - } - - if (has_second_rf) { -#if CONFIG_EXT_INTER - if (seg_mvs[index][mbmi->ref_frame[1]].as_int == INVALID_MV || - seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV) -#else - if (seg_mvs[index][mbmi->ref_frame[1]].as_int == INVALID_MV || - seg_mvs[index][mbmi->ref_frame[0]].as_int == INVALID_MV) -#endif // CONFIG_EXT_INTER - continue; - } - -#if CONFIG_DUAL_FILTER - (void)run_mv_search; -#endif // CONFIG_DUAL_FILTER - - if (has_second_rf && -#if CONFIG_EXT_INTER - this_mode == NEW_NEWMV && -#else - this_mode == NEWMV && -#endif // CONFIG_EXT_INTER -#if CONFIG_DUAL_FILTER - (mbmi->interp_filter[0] == EIGHTTAP_REGULAR || run_mv_search)) -#else - (mbmi->interp_filter == EIGHTTAP_REGULAR || run_mv_search)) -#endif // CONFIG_DUAL_FILTER - { - // adjust src pointers - mi_buf_shift(x, index); - if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { - int rate_mv; - frame_mv[this_mode][mbmi->ref_frame[0]].as_int = - seg_mvs[index][mbmi->ref_frame[0]].as_int; - frame_mv[this_mode][mbmi->ref_frame[1]].as_int = - seg_mvs[index][mbmi->ref_frame[1]].as_int; - joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row, - mi_col, -#if CONFIG_EXT_INTER - bsi->ref_mv, -#endif // CONFIG_EXT_INTER - &rate_mv, index); -#if CONFIG_EXT_INTER - compound_seg_newmvs[index][0].as_int = - frame_mv[this_mode][mbmi->ref_frame[0]].as_int; - compound_seg_newmvs[index][1].as_int = - frame_mv[this_mode][mbmi->ref_frame[1]].as_int; -#else - seg_mvs[index][mbmi->ref_frame[0]].as_int = - frame_mv[this_mode][mbmi->ref_frame[0]].as_int; - seg_mvs[index][mbmi->ref_frame[1]].as_int = - frame_mv[this_mode][mbmi->ref_frame[1]].as_int; -#endif // CONFIG_EXT_INTER - } - // restore src pointers - mi_buf_restore(x, orig_src, orig_pre); - } - - bsi->rdstat[index][mode_idx].brate = set_and_cost_bmi_mvs( - cpi, x, xd, index, this_mode, mode_mv[this_mode], frame_mv, - seg_mvs[index], -#if CONFIG_EXT_INTER - compound_seg_newmvs[index], -#endif // CONFIG_EXT_INTER - bsi->ref_mv, x->nmvjointcost, x->mvcost, mi_row, mi_col); - - for (ref = 0; ref < 1 + has_second_rf; ++ref) { - bsi->rdstat[index][mode_idx].mvs[ref].as_int = - mode_mv[this_mode][ref].as_int; - if (num_4x4_blocks_wide > 1) - bsi->rdstat[index + 1][mode_idx].mvs[ref].as_int = - mode_mv[this_mode][ref].as_int; - if (num_4x4_blocks_high > 1) - bsi->rdstat[index + 2][mode_idx].mvs[ref].as_int = - mode_mv[this_mode][ref].as_int; -#if CONFIG_REF_MV - bsi->rdstat[index][mode_idx].pred_mv[ref].as_int = - mi->bmi[index].pred_mv[ref].as_int; - if (num_4x4_blocks_wide > 1) - bsi->rdstat[index + 1][mode_idx].pred_mv[ref].as_int = - mi->bmi[index].pred_mv[ref].as_int; - if (num_4x4_blocks_high > 1) - bsi->rdstat[index + 2][mode_idx].pred_mv[ref].as_int = - mi->bmi[index].pred_mv[ref].as_int; -#endif // CONFIG_REF_MV -#if CONFIG_EXT_INTER - bsi->rdstat[index][mode_idx].ref_mv[ref].as_int = - bsi->ref_mv[ref]->as_int; - if (num_4x4_blocks_wide > 1) - bsi->rdstat[index + 1][mode_idx].ref_mv[ref].as_int = - bsi->ref_mv[ref]->as_int; - if (num_4x4_blocks_high > 1) - bsi->rdstat[index + 2][mode_idx].ref_mv[ref].as_int = - bsi->ref_mv[ref]->as_int; -#endif // CONFIG_EXT_INTER - } - - // Trap vectors that reach beyond the UMV borders - if (mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][0].as_mv) || - (has_second_rf && - mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][1].as_mv))) - continue; - - if (filter_idx > 0) { - BEST_SEG_INFO *ref_bsi = bsi_buf; - subpelmv = 0; - have_ref = 1; - - for (ref = 0; ref < 1 + has_second_rf; ++ref) { - subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv); -#if CONFIG_EXT_INTER - if (have_newmv_in_inter_mode(this_mode)) - have_ref &= - ((mode_mv[this_mode][ref].as_int == - ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int) && - (bsi->ref_mv[ref]->as_int == - ref_bsi->rdstat[index][mode_idx].ref_mv[ref].as_int)); - else -#endif // CONFIG_EXT_INTER - have_ref &= mode_mv[this_mode][ref].as_int == - ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int; - } - - have_ref &= ref_bsi->rdstat[index][mode_idx].brate > 0; - - if (filter_idx > 1 && !subpelmv && !have_ref) { - ref_bsi = bsi_buf + 1; - have_ref = 1; - for (ref = 0; ref < 1 + has_second_rf; ++ref) -#if CONFIG_EXT_INTER - if (have_newmv_in_inter_mode(this_mode)) - have_ref &= - ((mode_mv[this_mode][ref].as_int == - ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int) && - (bsi->ref_mv[ref]->as_int == - ref_bsi->rdstat[index][mode_idx].ref_mv[ref].as_int)); - else -#endif // CONFIG_EXT_INTER - have_ref &= mode_mv[this_mode][ref].as_int == - ref_bsi->rdstat[index][mode_idx].mvs[ref].as_int; - - have_ref &= ref_bsi->rdstat[index][mode_idx].brate > 0; - } - - if (!subpelmv && have_ref && - ref_bsi->rdstat[index][mode_idx].brdcost < INT64_MAX) { -#if CONFIG_REF_MV - bsi->rdstat[index][mode_idx].byrate = - ref_bsi->rdstat[index][mode_idx].byrate; - bsi->rdstat[index][mode_idx].bdist = - ref_bsi->rdstat[index][mode_idx].bdist; - bsi->rdstat[index][mode_idx].bsse = - ref_bsi->rdstat[index][mode_idx].bsse; - bsi->rdstat[index][mode_idx].brate += - ref_bsi->rdstat[index][mode_idx].byrate; - bsi->rdstat[index][mode_idx].eobs = - ref_bsi->rdstat[index][mode_idx].eobs; - - bsi->rdstat[index][mode_idx].brdcost = - RDCOST(x->rdmult, x->rddiv, bsi->rdstat[index][mode_idx].brate, - bsi->rdstat[index][mode_idx].bdist); - - memcpy(bsi->rdstat[index][mode_idx].ta, - ref_bsi->rdstat[index][mode_idx].ta, - sizeof(bsi->rdstat[index][mode_idx].ta)); - memcpy(bsi->rdstat[index][mode_idx].tl, - ref_bsi->rdstat[index][mode_idx].tl, - sizeof(bsi->rdstat[index][mode_idx].tl)); -#else - memcpy(&bsi->rdstat[index][mode_idx], - &ref_bsi->rdstat[index][mode_idx], sizeof(SEG_RDSTAT)); -#endif // CONFIG_REF_MV - if (num_4x4_blocks_wide > 1) - bsi->rdstat[index + 1][mode_idx].eobs = - ref_bsi->rdstat[index + 1][mode_idx].eobs; - if (num_4x4_blocks_high > 1) - bsi->rdstat[index + 2][mode_idx].eobs = - ref_bsi->rdstat[index + 2][mode_idx].eobs; - - if (bsi->rdstat[index][mode_idx].brdcost < new_best_rd) { -#if CONFIG_REF_MV - // If the NEWMV mode is using the same motion vector as the - // NEARESTMV mode, skip the rest rate-distortion calculations - // and use the inferred motion vector modes. - if (this_mode == NEWMV) { - if (has_second_rf) { - if (bsi->rdstat[index][mode_idx].mvs[0].as_int == - bsi->ref_mv[0]->as_int && - bsi->rdstat[index][mode_idx].mvs[1].as_int == - bsi->ref_mv[1]->as_int) - continue; - } else { - if (bsi->rdstat[index][mode_idx].mvs[0].as_int == - bsi->ref_mv[0]->as_int) - continue; - } - } -#endif // CONFIG_REF_MV - mode_selected = this_mode; - new_best_rd = bsi->rdstat[index][mode_idx].brdcost; -#if CONFIG_PVQ - od_encode_checkpoint(&x->daala_enc, &post_buf); -#endif // CONFIG_PVQ - } - continue; - } - } - - bsi->rdstat[index][mode_idx].brdcost = encode_inter_mb_segment_sub8x8( - cpi, x, bsi->segment_rd - this_segment_rd, index, - &bsi->rdstat[index][mode_idx].byrate, - &bsi->rdstat[index][mode_idx].bdist, - &bsi->rdstat[index][mode_idx].bsse, bsi->rdstat[index][mode_idx].ta, - bsi->rdstat[index][mode_idx].tl, idy, idx, mi_row, mi_col); - - if (bsi->rdstat[index][mode_idx].brdcost < INT64_MAX) { - bsi->rdstat[index][mode_idx].brdcost += RDCOST( - x->rdmult, x->rddiv, bsi->rdstat[index][mode_idx].brate, 0); - bsi->rdstat[index][mode_idx].brate += - bsi->rdstat[index][mode_idx].byrate; - bsi->rdstat[index][mode_idx].eobs = p->eobs[index]; - if (num_4x4_blocks_wide > 1) - bsi->rdstat[index + 1][mode_idx].eobs = p->eobs[index + 1]; - if (num_4x4_blocks_high > 1) - bsi->rdstat[index + 2][mode_idx].eobs = p->eobs[index + 2]; - } - - if (bsi->rdstat[index][mode_idx].brdcost < new_best_rd) { -#if CONFIG_REF_MV - // If the NEWMV mode is using the same motion vector as the - // NEARESTMV mode, skip the rest rate-distortion calculations - // and use the inferred motion vector modes. - if (this_mode == NEWMV) { - if (has_second_rf) { - if (bsi->rdstat[index][mode_idx].mvs[0].as_int == - bsi->ref_mv[0]->as_int && - bsi->rdstat[index][mode_idx].mvs[1].as_int == - bsi->ref_mv[1]->as_int) - continue; - } else { - if (bsi->rdstat[index][mode_idx].mvs[0].as_int == - bsi->ref_mv[0]->as_int) - continue; - } - } -#endif // CONFIG_REF_MV - mode_selected = this_mode; - new_best_rd = bsi->rdstat[index][mode_idx].brdcost; - -#if CONFIG_PVQ - od_encode_checkpoint(&x->daala_enc, &post_buf); -#endif // CONFIG_PVQ - } - } /*for each 4x4 mode*/ - - if (new_best_rd == INT64_MAX) { - int iy, midx; - for (iy = index + 1; iy < 4; ++iy) -#if CONFIG_EXT_INTER - for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx) -#else - for (midx = 0; midx < INTER_MODES; ++midx) -#endif // CONFIG_EXT_INTER - bsi->rdstat[iy][midx].brdcost = INT64_MAX; - bsi->segment_rd = INT64_MAX; -#if CONFIG_PVQ - od_encode_rollback(&x->daala_enc, &pre_buf); -#endif // CONFIG_PVQ - return INT64_MAX; - } - - mode_idx = INTER_OFFSET(mode_selected); - memcpy(t_above, bsi->rdstat[index][mode_idx].ta, sizeof(t_above)); - memcpy(t_left, bsi->rdstat[index][mode_idx].tl, sizeof(t_left)); -#if CONFIG_PVQ - od_encode_rollback(&x->daala_enc, &post_buf); -#endif // CONFIG_PVQ - -#if CONFIG_EXT_INTER - bsi->ref_mv[0]->as_int = bsi->rdstat[index][mode_idx].ref_mv[0].as_int; - if (has_second_rf) - bsi->ref_mv[1]->as_int = bsi->rdstat[index][mode_idx].ref_mv[1].as_int; -#endif // CONFIG_EXT_INTER - set_and_cost_bmi_mvs(cpi, x, xd, index, mode_selected, - mode_mv[mode_selected], frame_mv, seg_mvs[index], -#if CONFIG_EXT_INTER - compound_seg_newmvs[index], -#endif // CONFIG_EXT_INTER - bsi->ref_mv, x->nmvjointcost, x->mvcost, mi_row, - mi_col); - - br += bsi->rdstat[index][mode_idx].brate; - bd += bsi->rdstat[index][mode_idx].bdist; - block_sse += bsi->rdstat[index][mode_idx].bsse; - segmentyrate += bsi->rdstat[index][mode_idx].byrate; - this_segment_rd += bsi->rdstat[index][mode_idx].brdcost; - - if (this_segment_rd > bsi->segment_rd) { - int iy, midx; - for (iy = index + 1; iy < 4; ++iy) -#if CONFIG_EXT_INTER - for (midx = 0; midx < INTER_MODES + INTER_COMPOUND_MODES; ++midx) -#else - for (midx = 0; midx < INTER_MODES; ++midx) -#endif // CONFIG_EXT_INTER - bsi->rdstat[iy][midx].brdcost = INT64_MAX; - bsi->segment_rd = INT64_MAX; -#if CONFIG_PVQ - od_encode_rollback(&x->daala_enc, &pre_buf); -#endif // CONFIG_PVQ - return INT64_MAX; - } - } - } /* for each label */ -#if CONFIG_PVQ - od_encode_rollback(&x->daala_enc, &pre_buf); -#endif // CONFIG_PVQ - - bsi->r = br; - bsi->d = bd; - bsi->segment_yrate = segmentyrate; - bsi->segment_rd = this_segment_rd; - bsi->sse = block_sse; - - // update the coding decisions - for (k = 0; k < 4; ++k) bsi->modes[k] = mi->bmi[k].as_mode; - -#if CONFIG_DAALA_DIST - // Compute prediction (i.e. skip) and decoded distortion by daala-distortion. - { - const int src_stride = p->src.stride; - const int dst_stride = pd->dst.stride; - uint8_t *src = p->src.buf; - uint8_t *dst = pd->dst.buf; - const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->mbmi.sb_type, pd); - const int use_activity_masking = 0; - const int qm = OD_HVS_QM; - const int bsw = block_size_wide[plane_bsize]; - const int bsh = block_size_high[plane_bsize]; - int64_t rd1, rd2; - int64_t daala_sse, daala_dist; - TX_SIZE tx_size = mbmi->tx_size; - -#if CONFIG_HIGHBITDEPTH - uint8_t *recon_8x8; - DECLARE_ALIGNED(16, uint16_t, recon16[8 * 8]); - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - recon_8x8 = CONVERT_TO_BYTEPTR(recon16); - else - recon_8x8 = (uint8_t *)recon16; -#else - DECLARE_ALIGNED(16, uint8_t, recon_8x8[8 * 8]); -#endif // CONFIG_HIGHBITDEPTH - -#if CONFIG_PVQ - use_activity_masking = x->daala_enc.use_activity_masking; -#endif // CONFIG_PVQ - - // For each of sub8x8 prediction block in a 8x8 block - for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { - for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { - int i = idy * 2 + idx; - const uint8_t *const src_sub8x8 = - src + av1_raster_block_offset(BLOCK_8X8, i, p->src.stride); - uint8_t *const dst_sub8x8 = - dst + av1_raster_block_offset(BLOCK_8X8, i, pd->dst.stride); - uint8_t *recon_sub8x8 = recon_8x8 + (idy * 8 + idx) * 4; - const int txb_width = max_block_wide(xd, plane_bsize, 0); - const int txb_height = max_block_high(xd, plane_bsize, 0); - int idx_, idy_; - - av1_build_inter_predictor_sub8x8(xd, 0, i, idy, idx, mi_row, mi_col); -#if CONFIG_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - aom_highbd_subtract_block( - height, width, - av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8, - src_sub8x8, p->src.stride, dst_sub8x8, pd->dst.stride, xd->bd); - } else { - aom_subtract_block( - height, width, - av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8, - src_sub8x8, p->src.stride, dst_sub8x8, pd->dst.stride); - } -#else - aom_subtract_block( - bsh, bsw, av1_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), - 8, src_sub8x8, p->src.stride, dst_sub8x8, pd->dst.stride); -#endif // CONFIG_HIGHBITDEPTH - -#if CONFIG_HIGHBITDEPTH - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - aom_highbd_convolve_copy(dst_sub8x8, dst_stride, recon_sub8x8, 8, - NULL, 0, NULL, 0, bsw, bsh, xd->bd); - } else { -#endif // CONFIG_HIGHBITDEPTH - aom_convolve_copy(dst_sub8x8, dst_stride, recon_sub8x8, 8, NULL, 0, - NULL, 0, bsw, bsh); -#if CONFIG_HIGHBITDEPTH - } -#endif // CONFIG_HIGHBITDEPTH - - // To get decoded pixels, do 4x4 xform and quant for each 4x4 block - // in a sub8x8 prediction block. In case remaining parts of - // sub8x8 inter mode rdo assume pd->dst stores predicted pixels, - // use local buffer to store decoded pixels. - for (idy_ = 0; idy_ < txb_height; idy_++) { - for (idx_ = 0; idx_ < txb_width; idx_++) { - int coeff_ctx = 0; - const tran_low_t *dqcoeff; - uint16_t eob; - const PLANE_TYPE plane_type = PLANE_TYPE_Y; - uint8_t *recon_4x4 = recon_sub8x8 + (idy_ * 8 + idx_) * 4; - const int block_raster_idx = (idy + idy_) * 2 + (idx + idx_); - const int block = - av1_raster_order_to_block_index(tx_size, block_raster_idx); - TX_TYPE tx_type = get_tx_type(plane_type, xd, block, tx_size); - - dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - av1_xform_quant(cm, x, 0, block, idy + idy_, idx + idx_, BLOCK_8X8, - tx_size, coeff_ctx, AV1_XFORM_QUANT_FP); - if (xd->lossless[xd->mi[0]->mbmi.segment_id] == 0) - av1_optimize_b(cm, x, 0, block, tx_size, coeff_ctx); - - eob = p->eobs[block]; - av1_inverse_transform_block(xd, dqcoeff, tx_type, tx_size, - recon_4x4, 8, eob); - } - } - } - } - // Compute daala-distortion for a 8x8 block - daala_sse = av1_daala_dist(src, src_stride, pd->dst.buf, dst_stride, 8, 8, - qm, use_activity_masking, x->qindex) - << 4; - - daala_dist = av1_daala_dist(src, src_stride, recon_8x8, 8, 8, 8, qm, - use_activity_masking, x->qindex) - << 4; - - bsi->sse = daala_sse; - bsi->d = daala_dist; - - rd1 = RDCOST(x->rdmult, x->rddiv, bsi->r, bsi->d); - rd2 = RDCOST(x->rdmult, x->rddiv, 0, bsi->sse); - bsi->segment_rd = AOMMIN(rd1, rd2); - } -#endif // CONFIG_DAALA_DIST - - if (bsi->segment_rd > best_rd) return INT64_MAX; - /* set it to the best */ - for (idx = 0; idx < 4; idx++) { - mode_idx = INTER_OFFSET(bsi->modes[idx]); - mi->bmi[idx].as_mv[0].as_int = bsi->rdstat[idx][mode_idx].mvs[0].as_int; - if (has_second_ref(mbmi)) - mi->bmi[idx].as_mv[1].as_int = bsi->rdstat[idx][mode_idx].mvs[1].as_int; -#if CONFIG_REF_MV - mi->bmi[idx].pred_mv[0] = bsi->rdstat[idx][mode_idx].pred_mv[0]; - if (has_second_ref(mbmi)) - mi->bmi[idx].pred_mv[1] = bsi->rdstat[idx][mode_idx].pred_mv[1]; -#endif // CONFIG_REF_MV -#if CONFIG_EXT_INTER - mi->bmi[idx].ref_mv[0].as_int = bsi->rdstat[idx][mode_idx].ref_mv[0].as_int; - if (has_second_rf) - mi->bmi[idx].ref_mv[1].as_int = - bsi->rdstat[idx][mode_idx].ref_mv[1].as_int; -#endif // CONFIG_EXT_INTER - x->plane[0].eobs[idx] = bsi->rdstat[idx][mode_idx].eobs; - mi->bmi[idx].as_mode = bsi->modes[idx]; - } - - /* - * used to set mbmi->mv.as_int - */ - *returntotrate = bsi->r; - *returndistortion = bsi->d; - *returnyrate = bsi->segment_yrate; - *skippable = av1_is_skippable_in_plane(x, BLOCK_8X8, 0); - *psse = bsi->sse; - mbmi->mode = bsi->modes[3]; - - return bsi->segment_rd; -} - static void estimate_ref_frame_costs(const AV1_COMMON *cm, const MACROBLOCKD *xd, int segment_id, unsigned int *ref_costs_single, @@ -6808,15 +5652,13 @@ static void setup_buffer_inter( av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); // Gets an initial list of candidate vectors from neighbours and orders them - av1_find_mv_refs( - cm, xd, mi, ref_frame, -#if CONFIG_REF_MV - &mbmi_ext->ref_mv_count[ref_frame], mbmi_ext->ref_mv_stack[ref_frame], + av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame], + mbmi_ext->ref_mv_stack[ref_frame], #if CONFIG_EXT_INTER - mbmi_ext->compound_mode_context, + mbmi_ext->compound_mode_context, #endif // CONFIG_EXT_INTER -#endif // CONFIG_REF_MV - candidates, mi_row, mi_col, NULL, NULL, mbmi_ext->mode_context); + candidates, mi_row, mi_col, NULL, NULL, + mbmi_ext->mode_context); // Candidate refinement carried out at encoder and decoder av1_find_best_ref_mvs(cm->allow_high_precision_mv, candidates, @@ -6882,9 +5724,7 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, av1_set_mv_search_range(&x->mv_limits, &ref_mv); -#if CONFIG_REF_MV av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx); -#endif // CONFIG_REF_MV // Work out the size of the first step in the mv step search. // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc. @@ -6996,8 +5836,11 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph, - 1); + x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, +#if CONFIG_EXT_INTER + NULL, 0, 0, +#endif + pw, ph, 1); if (try_second) { const int minc = @@ -7021,7 +5864,11 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, - &dis, &x->pred_sse[ref], NULL, pw, ph, 1); + &dis, &x->pred_sse[ref], NULL, +#if CONFIG_EXT_INTER + NULL, 0, 0, +#endif + pw, ph, 1); if (this_var < best_mv_var) best_mv = x->best_mv.as_mv; x->best_mv.as_mv = best_mv; } @@ -7034,8 +5881,11 @@ static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, x, &ref_mv, cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0, - 0); + x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, +#if CONFIG_EXT_INTER + NULL, 0, 0, +#endif + 0, 0, 0); } #if CONFIG_MOTION_VAR break; @@ -7077,131 +5927,287 @@ static INLINE void restore_dst_buf(MACROBLOCKD *xd, BUFFER_SET dst) { } #if CONFIG_EXT_INTER -#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE -static void do_masked_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, - const uint8_t *mask, int mask_stride, - BLOCK_SIZE bsize, int mi_row, int mi_col, - int_mv *tmp_mv, int *rate_mv, int ref_idx) { +static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, + BLOCK_SIZE bsize, const MV *other_mv, + int mi_row, int mi_col, const int block, + int ref_idx, uint8_t *second_pred) { + const AV1_COMMON *const cm = &cpi->common; + const int pw = block_size_wide[bsize]; + const int ph = block_size_high[bsize]; MACROBLOCKD *xd = &x->e_mbd; - const AV1_COMMON *cm = &cpi->common; MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; - struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } }; - int bestsme = INT_MAX; - int step_param; - int sadpb = x->sadperbit16; - MV mvp_full; - int ref = mbmi->ref_frame[ref_idx]; - MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv; - - MvLimits tmp_mv_limits = x->mv_limits; - - const YV12_BUFFER_CONFIG *scaled_ref_frame = - av1_get_scaled_ref_frame(cpi, ref); - int i; + const int other_ref = mbmi->ref_frame[!ref_idx]; +#if CONFIG_DUAL_FILTER + InterpFilter interp_filter[2] = { + (ref_idx == 0) ? mbmi->interp_filter[2] : mbmi->interp_filter[0], + (ref_idx == 0) ? mbmi->interp_filter[3] : mbmi->interp_filter[1] + }; +#else + const InterpFilter interp_filter = mbmi->interp_filter; +#endif // CONFIG_DUAL_FILTER + struct scale_factors sf; +#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION + struct macroblockd_plane *const pd = &xd->plane[0]; + // ic and ir are the 4x4 coordiantes of the sub8x8 at index "block" + const int ic = block & 1; + const int ir = (block - ic) >> 1; + const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic; + const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir; +#if CONFIG_GLOBAL_MOTION + WarpedMotionParams *const wm = &xd->global_motion[other_ref]; + int is_global = is_global_mv_block(xd->mi[0], block, wm->wmtype); +#endif // CONFIG_GLOBAL_MOTION +#else + (void)block; +#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION - MV pred_mv[3]; - pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv; - pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv; - pred_mv[2] = x->pred_mv[ref]; + // This function should only ever be called for compound modes + assert(has_second_ref(mbmi)); -#if CONFIG_REF_MV - av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx); -#endif // CONFIG_REF_MV + struct buf_2d backup_yv12[MAX_MB_PLANE]; + const YV12_BUFFER_CONFIG *const scaled_ref_frame = + av1_get_scaled_ref_frame(cpi, other_ref); if (scaled_ref_frame) { + int i; // Swap out the reference frame for a version that's been scaled to // match the resolution of the current frame, allowing the existing // motion search code to be used without additional modifications. for (i = 0; i < MAX_MB_PLANE; i++) - backup_yv12[i] = xd->plane[i].pre[ref_idx]; - - av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL); + backup_yv12[i] = xd->plane[i].pre[!ref_idx]; + av1_setup_pre_planes(xd, !ref_idx, scaled_ref_frame, mi_row, mi_col, NULL); } - av1_set_mv_search_range(&x->mv_limits, &ref_mv); +// Since we have scaled the reference frames to match the size of the current +// frame we must use a unit scaling factor during mode selection. +#if CONFIG_HIGHBITDEPTH + av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width, + cm->height, cm->use_highbitdepth); +#else + av1_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width, + cm->height); +#endif // CONFIG_HIGHBITDEPTH - // Work out the size of the first step in the mv step search. - // 0 here is maximum length first step. 1 is MAX >> 1 etc. - if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { - // Take wtd average of the step_params based on the last frame's - // max mv magnitude and that based on the best ref mvs of the current - // block for the given reference. - step_param = - (av1_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) / - 2; + struct buf_2d ref_yv12; + + const int plane = 0; + ConvolveParams conv_params = get_conv_params(0, plane); +#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION + WarpTypesAllowed warp_types; +#if CONFIG_GLOBAL_MOTION + warp_types.global_warp_allowed = is_global; +#endif // CONFIG_GLOBAL_MOTION +#if CONFIG_WARPED_MOTION + warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL; +#endif // CONFIG_WARPED_MOTION +#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION + + // Initialized here because of compiler problem in Visual Studio. + ref_yv12 = xd->plane[plane].pre[!ref_idx]; + +// Get the prediction block from the 'other' reference frame. +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + av1_highbd_build_inter_predictor( + ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph, + 0, interp_filter, +#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION + &warp_types, p_col, p_row, +#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION + plane, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd); } else { - step_param = cpi->mv_step_param; +#endif // CONFIG_HIGHBITDEPTH + av1_build_inter_predictor( + ref_yv12.buf, ref_yv12.stride, second_pred, pw, other_mv, &sf, pw, ph, + &conv_params, interp_filter, +#if CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION + &warp_types, p_col, p_row, plane, !ref_idx, +#endif // CONFIG_GLOBAL_MOTION || CONFIG_WARPED_MOTION + MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd); +#if CONFIG_HIGHBITDEPTH } +#endif // CONFIG_HIGHBITDEPTH - // TODO(debargha): is show_frame needed here? - if (cpi->sf.adaptive_motion_search && bsize < cm->sb_size && cm->show_frame) { - int boffset = - 2 * (b_width_log2_lookup[cm->sb_size] - - AOMMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); - step_param = AOMMAX(step_param, boffset); + if (scaled_ref_frame) { + // Restore the prediction frame pointers to their unscaled versions. + int i; + for (i = 0; i < MAX_MB_PLANE; i++) + xd->plane[i].pre[!ref_idx] = backup_yv12[i]; } +} - if (cpi->sf.adaptive_motion_search) { - int bwl = b_width_log2_lookup[bsize]; - int bhl = b_height_log2_lookup[bsize]; - int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4); +// Search for the best mv for one component of a compound, +// given that the other component is fixed. +static void compound_single_motion_search( + const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *this_mv, + int mi_row, int mi_col, const uint8_t *second_pred, const uint8_t *mask, + int mask_stride, int *rate_mv, const int block, int ref_idx) { + const int pw = block_size_wide[bsize]; + const int ph = block_size_high[bsize]; + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + const int ref = mbmi->ref_frame[ref_idx]; + int_mv ref_mv = x->mbmi_ext->ref_mvs[ref][0]; + struct macroblockd_plane *const pd = &xd->plane[0]; - if (tlevel < 5) step_param += 2; + struct buf_2d backup_yv12[MAX_MB_PLANE]; + const YV12_BUFFER_CONFIG *const scaled_ref_frame = + av1_get_scaled_ref_frame(cpi, ref); - // prev_mv_sad is not setup for dynamically scaled frames. - if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) { - for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) { - if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { - x->pred_mv[ref].row = 0; - x->pred_mv[ref].col = 0; - tmp_mv->as_int = INVALID_MV; + // Check that this is either an interinter or an interintra block + assert(has_second_ref(mbmi) || + (ref_idx == 0 && mbmi->ref_frame[1] == INTRA_FRAME)); - if (scaled_ref_frame) { - int j; - for (j = 0; j < MAX_MB_PLANE; ++j) - xd->plane[j].pre[ref_idx] = backup_yv12[j]; - } - return; - } - } - } + if (scaled_ref_frame) { + int i; + // Swap out the reference frame for a version that's been scaled to + // match the resolution of the current frame, allowing the existing + // motion search code to be used without additional modifications. + for (i = 0; i < MAX_MB_PLANE; i++) + backup_yv12[i] = xd->plane[i].pre[ref_idx]; + av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL); } - mvp_full = pred_mv[x->mv_best_ref_index[ref]]; + struct buf_2d orig_yv12; + int bestsme = INT_MAX; + int sadpb = x->sadperbit16; + MV *const best_mv = &x->best_mv.as_mv; + int search_range = 3; + + MvLimits tmp_mv_limits = x->mv_limits; - mvp_full.col >>= 3; - mvp_full.row >>= 3; + // Initialized here because of compiler problem in Visual Studio. + if (ref_idx) { + orig_yv12 = pd->pre[0]; + pd->pre[0] = pd->pre[ref_idx]; + } - bestsme = av1_masked_full_pixel_diamond( - cpi, x, mask, mask_stride, &mvp_full, step_param, sadpb, - MAX_MVSEARCH_STEPS - 1 - step_param, 1, &cpi->fn_ptr[bsize], &ref_mv, - &tmp_mv->as_mv, ref_idx); + // Do compound motion search on the current reference frame. + av1_set_mv_search_range(&x->mv_limits, &ref_mv.as_mv); + + // Use the mv result from the single mode as mv predictor. + *best_mv = *this_mv; + + best_mv->col >>= 3; + best_mv->row >>= 3; + + av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx); + + // Small-range full-pixel motion search. + bestsme = av1_refining_search_8p_c(x, sadpb, search_range, + &cpi->fn_ptr[bsize], mask, mask_stride, + ref_idx, &ref_mv.as_mv, second_pred); + if (bestsme < INT_MAX) { + if (mask) + bestsme = + av1_get_mvpred_mask_var(x, best_mv, &ref_mv.as_mv, second_pred, mask, + mask_stride, ref_idx, &cpi->fn_ptr[bsize], 1); + else + bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv.as_mv, second_pred, + &cpi->fn_ptr[bsize], 1); + } x->mv_limits = tmp_mv_limits; if (bestsme < INT_MAX) { int dis; /* TODO: use dis in distortion calculation later. */ - av1_find_best_masked_sub_pixel_tree_up( - cpi, x, mask, mask_stride, mi_row, mi_col, &tmp_mv->as_mv, &ref_mv, - cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step, - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], ref_idx, - cpi->sf.use_upsampled_references); - } - *rate_mv = av1_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, - x->mvcost, MV_COST_WEIGHT); + unsigned int sse; + if (cpi->sf.use_upsampled_references) { + // Use up-sampled reference frames. + struct buf_2d backup_pred = pd->pre[0]; + const YV12_BUFFER_CONFIG *upsampled_ref = get_upsampled_ref(cpi, ref); + + // Set pred for Y plane + setup_pred_plane(&pd->pre[0], bsize, upsampled_ref->y_buffer, + upsampled_ref->y_crop_width, + upsampled_ref->y_crop_height, upsampled_ref->y_stride, + (mi_row << 3), (mi_col << 3), NULL, pd->subsampling_x, + pd->subsampling_y); + +// If bsize < BLOCK_8X8, adjust pred pointer for this block +#if !CONFIG_CB4X4 + if (bsize < BLOCK_8X8) + pd->pre[0].buf = + &pd->pre[0].buf[(av1_raster_block_offset(BLOCK_8X8, block, + pd->pre[0].stride)) + << 3]; +#endif // !CONFIG_CB4X4 + + bestsme = cpi->find_fractional_mv_step( + x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, + &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL, + x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, + mask_stride, ref_idx, pw, ph, 1); + + // Restore the reference frames. + pd->pre[0] = backup_pred; + } else { + (void)block; + bestsme = cpi->find_fractional_mv_step( + x, &ref_mv.as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, + &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL, + x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, + mask_stride, ref_idx, pw, ph, 0); + } + } + + // Restore the pointer to the first (possibly scaled) prediction buffer. + if (ref_idx) pd->pre[0] = orig_yv12; + + if (bestsme < INT_MAX) *this_mv = *best_mv; - if (cpi->sf.adaptive_motion_search && cm->show_frame) - x->pred_mv[ref] = tmp_mv->as_mv; + *rate_mv = 0; if (scaled_ref_frame) { + // Restore the prediction frame pointers to their unscaled versions. + int i; for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[ref_idx] = backup_yv12[i]; } + + av1_set_mvcost(x, ref, ref_idx, mbmi->ref_mv_idx); + *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost, + x->mvcost, MV_COST_WEIGHT); } +// Wrapper for compound_single_motion_search, for the common case +// where the second prediction is also an inter mode. +static void compound_single_motion_search_interinter( + const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv, + int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv, + const int block, int ref_idx) { + MACROBLOCKD *xd = &x->e_mbd; + MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; + + // This function should only ever be called for compound modes + assert(has_second_ref(mbmi)); + +// Prediction buffer from second frame. +#if CONFIG_HIGHBITDEPTH + DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]); + uint8_t *second_pred; + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16); + else + second_pred = (uint8_t *)second_pred_alloc_16; +#else + DECLARE_ALIGNED(16, uint8_t, second_pred[MAX_SB_SQUARE]); +#endif // CONFIG_HIGHBITDEPTH + + MV *this_mv = &frame_mv[mbmi->ref_frame[ref_idx]].as_mv; + const MV *other_mv = &frame_mv[mbmi->ref_frame[!ref_idx]].as_mv; + + build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block, + ref_idx, second_pred); + + compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col, + second_pred, mask, mask_stride, rate_mv, block, + ref_idx); +} + +#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE static void do_masked_motion_search_indexed( - const AV1_COMP *const cpi, MACROBLOCK *x, + const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv, const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int which) { // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both @@ -7213,23 +6219,22 @@ static void do_masked_motion_search_indexed( mask = av1_get_compound_type_mask(comp_data, sb_type); - if (which == 0 || which == 2) - do_masked_motion_search(cpi, x, mask, mask_stride, bsize, mi_row, mi_col, - &tmp_mv[0], &rate_mv[0], 0); - - if (which == 1 || which == 2) { -// get the negative mask -#if CONFIG_COMPOUND_SEGMENT - uint8_t inv_mask_buf[2 * MAX_SB_SQUARE]; - const int h = block_size_high[bsize]; - mask = av1_get_compound_type_mask_inverse( - comp_data, inv_mask_buf, h, mask_stride, mask_stride, sb_type); -#else - mask = av1_get_compound_type_mask_inverse(comp_data, sb_type); -#endif // CONFIG_COMPOUND_SEGMENT - do_masked_motion_search(cpi, x, mask, mask_stride, bsize, mi_row, mi_col, - &tmp_mv[1], &rate_mv[1], 1); - } + int_mv frame_mv[TOTAL_REFS_PER_FRAME]; + MV_REFERENCE_FRAME rf[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] }; + assert(bsize >= BLOCK_8X8 || CONFIG_CB4X4); + + frame_mv[rf[0]].as_int = cur_mv[0].as_int; + frame_mv[rf[1]].as_int = cur_mv[1].as_int; + if (which == 0 || which == 1) { + compound_single_motion_search_interinter(cpi, x, bsize, frame_mv, mi_row, + mi_col, mask, mask_stride, rate_mv, + 0, which); + } else if (which == 2) { + joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, mask, + mask_stride, rate_mv, 0); + } + tmp_mv[0].as_int = frame_mv[rf[0]].as_int; + tmp_mv[1].as_int = frame_mv[rf[1]].as_int; } #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE #endif // CONFIG_EXT_INTER @@ -7275,7 +6280,7 @@ static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x, const int f_index = bsize - BLOCK_8X8; const int bw = block_size_wide[bsize]; const int bh = block_size_high[bsize]; - uint32_t esq[2][4], var; + uint32_t esq[2][4]; int64_t tl, br; #if CONFIG_HIGHBITDEPTH @@ -7285,23 +6290,22 @@ static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x, } #endif // CONFIG_HIGHBITDEPTH - var = cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]); - var = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2, - stride0, &esq[0][1]); - var = cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride, - pred0 + bh / 2 * stride0, stride0, &esq[0][2]); - var = cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride, - pred0 + bh / 2 * stride0 + bw / 2, stride0, - &esq[0][3]); - var = cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]); - var = cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2, - stride1, &esq[1][1]); - var = cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride, - pred1 + bh / 2 * stride1, stride0, &esq[1][2]); - var = cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride, - pred1 + bh / 2 * stride1 + bw / 2, stride0, - &esq[1][3]); - (void)var; + cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]); + cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2, stride0, + &esq[0][1]); + cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride, + pred0 + bh / 2 * stride0, stride0, &esq[0][2]); + cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride, + pred0 + bh / 2 * stride0 + bw / 2, stride0, + &esq[0][3]); + cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]); + cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2, stride1, + &esq[1][1]); + cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride, + pred1 + bh / 2 * stride1, stride0, &esq[1][2]); + cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride, + pred1 + bh / 2 * stride1 + bw / 2, stride0, + &esq[1][3]); tl = (int64_t)(esq[0][0] + esq[0][1] + esq[0][2]) - (int64_t)(esq[1][0] + esq[1][1] + esq[1][2]); @@ -7353,16 +6357,6 @@ static InterpFilter predict_interp_filter( single_filter[NEARESTMV][refs[1]]) best_filter = single_filter[NEARESTMV][refs[0]]; break; - case NEAREST_NEARMV: - if (single_filter[NEARESTMV][refs[0]] == - single_filter[NEARMV][refs[1]]) - best_filter = single_filter[NEARESTMV][refs[0]]; - break; - case NEAR_NEARESTMV: - if (single_filter[NEARMV][refs[0]] == - single_filter[NEARESTMV][refs[1]]) - best_filter = single_filter[NEARMV][refs[0]]; - break; case NEAR_NEARMV: if (single_filter[NEARMV][refs[0]] == single_filter[NEARMV][refs[1]]) best_filter = single_filter[NEARMV][refs[0]]; @@ -7575,6 +6569,7 @@ static int64_t pick_interinter_wedge(const AV1_COMP *const cpi, int wedge_sign = 0; assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize)); + assert(cpi->common.allow_masked_compound); if (cpi->sf.fast_wedge_sign_estimate) { wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw); @@ -7688,6 +6683,7 @@ static int64_t pick_interintra_wedge(const AV1_COMP *const cpi, int wedge_index = -1; assert(is_interintra_wedge_used(bsize)); + assert(cpi->common.allow_interintra_compound); rd = pick_wedge_fixed_sign(cpi, x, bsize, p0, p1, 0, &wedge_index); @@ -7715,15 +6711,13 @@ static int64_t pick_interinter_mask(const AV1_COMP *const cpi, MACROBLOCK *x, } } -static int interinter_compound_motion_search(const AV1_COMP *const cpi, - MACROBLOCK *x, - const BLOCK_SIZE bsize, - const int this_mode, int mi_row, - int mi_col) { +static int interinter_compound_motion_search( + const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv, + const BLOCK_SIZE bsize, const int this_mode, int mi_row, int mi_col) { MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; int_mv tmp_mv[2]; - int rate_mvs[2], tmp_rate_mv = 0; + int tmp_rate_mv = 0; const INTERINTER_COMPOUND_DATA compound_data = { #if CONFIG_WEDGE mbmi->wedge_index, @@ -7736,20 +6730,17 @@ static int interinter_compound_motion_search(const AV1_COMP *const cpi, mbmi->interinter_compound_type }; if (this_mode == NEW_NEWMV) { - do_masked_motion_search_indexed(cpi, x, &compound_data, bsize, mi_row, - mi_col, tmp_mv, rate_mvs, 2); - tmp_rate_mv = rate_mvs[0] + rate_mvs[1]; + do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize, + mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2); mbmi->mv[0].as_int = tmp_mv[0].as_int; mbmi->mv[1].as_int = tmp_mv[1].as_int; } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) { - do_masked_motion_search_indexed(cpi, x, &compound_data, bsize, mi_row, - mi_col, tmp_mv, rate_mvs, 0); - tmp_rate_mv = rate_mvs[0]; + do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize, + mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0); mbmi->mv[0].as_int = tmp_mv[0].as_int; } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) { - do_masked_motion_search_indexed(cpi, x, &compound_data, bsize, mi_row, - mi_col, tmp_mv, rate_mvs, 1); - tmp_rate_mv = rate_mvs[1]; + do_masked_motion_search_indexed(cpi, x, cur_mv, &compound_data, bsize, + mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1); mbmi->mv[1].as_int = tmp_mv[1].as_int; } return tmp_rate_mv; @@ -7760,6 +6751,7 @@ static int64_t build_and_cost_compound_type( const BLOCK_SIZE bsize, const int this_mode, int rs2, int rate_mv, BUFFER_SET *ctx, int *out_rate_mv, uint8_t **preds0, uint8_t **preds1, int *strides, int mi_row, int mi_col) { + const AV1_COMMON *const cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; int rate_sum; @@ -7775,9 +6767,9 @@ static int64_t build_and_cost_compound_type( if (have_newmv_in_inter_mode(this_mode) && use_masked_motion_search(compound_type)) { - *out_rate_mv = interinter_compound_motion_search(cpi, x, bsize, this_mode, - mi_row, mi_col); - av1_build_inter_predictors_sby(xd, mi_row, mi_col, ctx, bsize); + *out_rate_mv = interinter_compound_motion_search(cpi, x, cur_mv, bsize, + this_mode, mi_row, mi_col); + av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb); rd = RDCOST(x->rdmult, x->rddiv, rs2 + *out_rate_mv + rate_sum, dist_sum); @@ -7830,9 +6822,6 @@ typedef struct { // Pointer to array of motion vectors to use for each ref and their rates // Should point to first of 2 arrays in 2D array int *single_newmv_rate; - // Pointers costs of compound inter-intra and inter-inter predictions - int *compmode_interintra_cost; - int *compmode_interinter_cost; // Pointer to array of predicted rate-distortion // Should point to first of 2 arrays in 2D array int64_t (*modelled_rd)[TOTAL_REFS_PER_FRAME]; @@ -7872,14 +6861,12 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { - joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, - rate_mv, 0); + joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, NULL, + 0, rate_mv, 0); } else { *rate_mv = 0; for (i = 0; i < 2; ++i) { -#if CONFIG_REF_MV av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx); -#endif // CONFIG_REF_MV *rate_mv += av1_mv_bit_cost( &frame_mv[refs[i]].as_mv, &mbmi_ext->ref_mvs[refs[i]][0].as_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); @@ -7887,21 +6874,31 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, } } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) { frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; -#if CONFIG_REF_MV - av1_set_mvcost(x, refs[1], 1, mbmi->ref_mv_idx); -#endif // CONFIG_REF_MV - *rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv, - &mbmi_ext->ref_mvs[refs[1]][0].as_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { + frame_mv[refs[0]].as_int = + mode_mv[compound_ref0_mode(this_mode)][refs[0]].as_int; + compound_single_motion_search_interinter( + cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1); + } else { + av1_set_mvcost(x, refs[1], 1, mbmi->ref_mv_idx); + *rate_mv = av1_mv_bit_cost(&frame_mv[refs[1]].as_mv, + &mbmi_ext->ref_mvs[refs[1]][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + } } else { assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV); frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; -#if CONFIG_REF_MV - av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx); -#endif // CONFIG_REF_MV - *rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv, - &mbmi_ext->ref_mvs[refs[0]][0].as_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { + frame_mv[refs[1]].as_int = + mode_mv[compound_ref1_mode(this_mode)][refs[1]].as_int; + compound_single_motion_search_interinter( + cpi, x, bsize, frame_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0); + } else { + av1_set_mvcost(x, refs[0], 0, mbmi->ref_mv_idx); + *rate_mv = av1_mv_bit_cost(&frame_mv[refs[0]].as_mv, + &mbmi_ext->ref_mvs[refs[0]][0].as_mv, + x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); + } } #else // Initialize mv using single prediction mode result. @@ -7913,9 +6910,7 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, } else { *rate_mv = 0; for (i = 0; i < 2; ++i) { -#if CONFIG_REF_MV av1_set_mvcost(x, refs[i], i, mbmi->ref_mv_idx); -#endif // CONFIG_REF_MV *rate_mv += av1_mv_bit_cost(&frame_mv[refs[i]].as_mv, &mbmi_ext->ref_mvs[refs[i]][0].as_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); @@ -7986,7 +6981,7 @@ int64_t interpolation_filter_search( set_default_interp_filters(mbmi, assign_filter); *switchable_rate = av1_get_switchable_rate(cpi, xd); - av1_build_inter_predictors_sb(xd, mi_row, mi_col, orig_dst, bsize); + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist, skip_txfm_sb, skip_sse_sb); *rd = RDCOST(x->rdmult, x->rddiv, *switchable_rate + tmp_rate, tmp_dist); @@ -8022,7 +7017,7 @@ int64_t interpolation_filter_search( mbmi->interp_filter = (InterpFilter)i; #endif // CONFIG_DUAL_FILTER tmp_rs = av1_get_switchable_rate(cpi, xd); - av1_build_inter_predictors_sb(xd, mi_row, mi_col, orig_dst, bsize); + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist, &tmp_skip_sb, &tmp_skip_sse); tmp_rd = RDCOST(x->rdmult, x->rddiv, tmp_rs + tmp_rate, tmp_dist); @@ -8077,6 +7072,7 @@ static int64_t motion_mode_rd( int mi_col, HandleInterModeArgs *const args, const int64_t ref_best_rd, const int *refs, int rate_mv, #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION + int_mv *const single_newmv, #if CONFIG_EXT_INTER int rate2_bmc_nocoeff, MB_MODE_INFO *best_bmc_mbmi, #if CONFIG_MOTION_VAR @@ -8183,10 +7179,10 @@ static int64_t motion_mode_rd( if (!has_subpel_mv_component(xd->mi[0], xd, 1)) mbmi->interp_filter[1] = EIGHTTAP_REGULAR; #endif // CONFIG_DUAL_FILTER - av1_build_inter_predictors_sb(xd, mi_row, mi_col, orig_dst, bsize); + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); #if CONFIG_EXT_INTER } else { - av1_build_inter_predictors_sb(xd, mi_row, mi_col, orig_dst, bsize); + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); #endif // CONFIG_EXT_INTER } av1_build_obmc_inter_prediction( @@ -8214,10 +7210,55 @@ static int64_t motion_mode_rd( : cm->interp_filter; #endif // CONFIG_DUAL_FILTER - if (find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize, - mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col, - &mbmi->wm_params[0], mi_row, mi_col) == 0) { - av1_build_inter_predictors_sb(xd, mi_row, mi_col, NULL, bsize); + if (!find_projection(mbmi->num_proj_ref[0], pts, pts_inref, bsize, + mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col, + &mbmi->wm_params[0], mi_row, mi_col)) { + // Refine MV for NEWMV mode + if (!is_comp_pred && have_newmv_in_inter_mode(this_mode)) { + int tmp_rate_mv = 0; + const int_mv mv0 = mbmi->mv[0]; + WarpedMotionParams wm_params0 = mbmi->wm_params[0]; + + // Refine MV in a small range. + av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts, pts_inref); + + // Keep the refined MV and WM parameters. + if (mv0.as_int != mbmi->mv[0].as_int) { + const int ref = refs[0]; + const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv; + + tmp_rate_mv = + av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv, x->nmvjointcost, + x->mvcost, MV_COST_WEIGHT); + + if (cpi->sf.adaptive_motion_search) + x->pred_mv[ref] = mbmi->mv[0].as_mv; + + single_newmv[ref] = mbmi->mv[0]; + + if (discount_newmv_test(cpi, this_mode, mbmi->mv[0], mode_mv, + refs[0])) { + tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); + } +#if CONFIG_EXT_INTER + tmp_rate2 = rate2_bmc_nocoeff - rate_mv_bmc + tmp_rate_mv; +#else + tmp_rate2 = rate2_nocoeff - rate_mv + tmp_rate_mv; +#endif // CONFIG_EXT_INTER +#if CONFIG_DUAL_FILTER + if (!has_subpel_mv_component(xd->mi[0], xd, 0)) + mbmi->interp_filter[0] = EIGHTTAP_REGULAR; + if (!has_subpel_mv_component(xd->mi[0], xd, 1)) + mbmi->interp_filter[1] = EIGHTTAP_REGULAR; +#endif // CONFIG_DUAL_FILTER + } else { + // Restore the old MV and WM parameters. + mbmi->mv[0] = mv0; + mbmi->wm_params[0] = wm_params0; + } + } + + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist, skip_txfm_sb, skip_sse_sb); } else { @@ -8446,16 +7487,16 @@ static int64_t handle_inter_mode( int rate_mv = 0; #if CONFIG_EXT_INTER int pred_exists = 1; +#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT const int bw = block_size_wide[bsize]; +#endif // ONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT int_mv single_newmv[TOTAL_REFS_PER_FRAME]; #if CONFIG_INTERINTRA const unsigned int *const interintra_mode_cost = cpi->interintra_mode_cost[size_group_lookup[bsize]]; #endif // CONFIG_INTERINTRA const int is_comp_interintra_pred = (mbmi->ref_frame[1] == INTRA_FRAME); -#if CONFIG_REF_MV uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); -#endif // CONFIG_REF_MV #else int_mv *const single_newmv = args->single_newmv; #endif // CONFIG_EXT_INTER @@ -8484,10 +7525,19 @@ static int64_t handle_inter_mode( int16_t mode_ctx; #if CONFIG_EXT_INTER - *args->compmode_interintra_cost = 0; +#if CONFIG_INTERINTRA + int compmode_interintra_cost = 0; mbmi->use_wedge_interintra = 0; - *args->compmode_interinter_cost = 0; +#endif +#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT + int compmode_interinter_cost = 0; mbmi->interinter_compound_type = COMPOUND_AVERAGE; +#endif + +#if CONFIG_INTERINTRA + if (!cm->allow_interintra_compound && is_comp_interintra_pred) + return INT64_MAX; +#endif // CONFIG_INTERINTRA // is_comp_interintra_pred implies !is_comp_pred assert(!is_comp_interintra_pred || (!is_comp_pred)); @@ -8495,7 +7545,6 @@ static int64_t handle_inter_mode( assert(!is_comp_interintra_pred || is_interintra_allowed(mbmi)); #endif // CONFIG_EXT_INTER -#if CONFIG_REF_MV #if CONFIG_EXT_INTER if (is_comp_pred) mode_ctx = mbmi_ext->compound_mode_context[refs[0]]; @@ -8503,9 +7552,6 @@ static int64_t handle_inter_mode( #endif // CONFIG_EXT_INTER mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame, bsize, -1); -#else // CONFIG_REF_MV - mode_ctx = mbmi_ext->mode_context[refs[0]]; -#endif // CONFIG_REF_MV #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) @@ -8545,7 +7591,6 @@ static int64_t handle_inter_mode( mbmi->mv[i].as_int = cur_mv[i].as_int; } -#if CONFIG_REF_MV #if CONFIG_EXT_INTER if (this_mode == NEAREST_NEARESTMV) #else @@ -8569,7 +7614,7 @@ static int64_t handle_inter_mode( #if CONFIG_EXT_INTER if (mbmi_ext->ref_mv_count[ref_frame_type] > 0) { - if (this_mode == NEAREST_NEWMV || this_mode == NEAREST_NEARMV) { + if (this_mode == NEAREST_NEWMV) { cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][0].this_mv; lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv); @@ -8578,7 +7623,7 @@ static int64_t handle_inter_mode( mbmi->mv[0].as_int = cur_mv[0].as_int; } - if (this_mode == NEW_NEARESTMV || this_mode == NEAR_NEARESTMV) { + if (this_mode == NEW_NEARESTMV) { cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][0].comp_mv; lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv); @@ -8590,8 +7635,7 @@ static int64_t handle_inter_mode( if (mbmi_ext->ref_mv_count[ref_frame_type] > 1) { int ref_mv_idx = mbmi->ref_mv_idx + 1; - if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARESTMV || - this_mode == NEAR_NEARMV) { + if (this_mode == NEAR_NEWMV || this_mode == NEAR_NEARMV) { cur_mv[0] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].this_mv; lower_mv_precision(&cur_mv[0].as_mv, cm->allow_high_precision_mv); @@ -8600,8 +7644,7 @@ static int64_t handle_inter_mode( mbmi->mv[0].as_int = cur_mv[0].as_int; } - if (this_mode == NEW_NEARMV || this_mode == NEAREST_NEARMV || - this_mode == NEAR_NEARMV) { + if (this_mode == NEW_NEARMV || this_mode == NEAR_NEARMV) { cur_mv[1] = mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx].comp_mv; lower_mv_precision(&cur_mv[1].as_mv, cm->allow_high_precision_mv); @@ -8626,7 +7669,6 @@ static int64_t handle_inter_mode( } } #endif // CONFIG_EXT_INTER -#endif // CONFIG_REF_MV // do first prediction into the destination buffer. Do the next // prediction into a temporary buffer. Then keep track of which one @@ -8659,7 +7701,7 @@ static int64_t handle_inter_mode( #else rd_stats->rate += AOMMIN(cost_mv_ref(cpi, this_mode, mode_ctx), cost_mv_ref(cpi, NEARESTMV, mode_ctx)); -#endif // CONFIG_REF_MV && CONFIG_EXT_INTER +#endif // CONFIG_EXT_INTER } else { rd_stats->rate += cost_mv_ref(cpi, this_mode, mode_ctx); } @@ -8688,6 +7730,7 @@ static int64_t handle_inter_mode( #endif // CONFIG_MOTION_VAR #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION +#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT if (is_comp_pred) { int rate_sum, rs2; int64_t dist_sum; @@ -8705,6 +7748,9 @@ static int64_t handle_inter_mode( int strides[1] = { bw }; int tmp_rate_mv; int masked_compound_used = is_any_masked_compound_used(bsize); +#if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE + masked_compound_used = masked_compound_used && cm->allow_masked_compound; +#endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE COMPOUND_TYPE cur_type; best_mv[0].as_int = cur_mv[0].as_int; @@ -8714,8 +7760,6 @@ static int64_t handle_inter_mode( uint8_t tmp_mask_buf[2 * MAX_SB_SQUARE]; best_compound_data.seg_mask = tmp_mask_buf; #endif // CONFIG_COMPOUND_SEGMENT - av1_cost_tokens(compound_type_cost, cm->fc->compound_type_prob[bsize], - av1_compound_type_tree); if (masked_compound_used) { av1_cost_tokens(compound_type_cost, cm->fc->compound_type_prob[bsize], @@ -8728,6 +7772,7 @@ static int64_t handle_inter_mode( } for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) { + if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break; if (!is_interinter_compound_used(cur_type, bsize)) break; tmp_rate_mv = rate_mv; best_rd_cur = INT64_MAX; @@ -8740,7 +7785,8 @@ static int64_t handle_inter_mode( switch (cur_type) { case COMPOUND_AVERAGE: - av1_build_inter_predictors_sby(xd, mi_row, mi_col, &orig_dst, bsize); + av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst, + bsize); av1_subtract_plane(x, bsize, 0); rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb, @@ -8830,13 +7876,14 @@ static int64_t handle_inter_mode( pred_exists = 0; - *args->compmode_interinter_cost = + compmode_interinter_cost = av1_cost_literal(get_interinter_compound_type_bits( bsize, mbmi->interinter_compound_type)) + (masked_compound_used ? compound_type_cost[mbmi->interinter_compound_type] : 0); } +#endif // CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT #if CONFIG_INTERINTRA if (is_comp_interintra_pred) { @@ -8863,7 +7910,7 @@ static int64_t handle_inter_mode( xd->plane[j].dst.buf = tmp_buf + j * MAX_SB_SQUARE; xd->plane[j].dst.stride = bw; } - av1_build_inter_predictors_sby(xd, mi_row, mi_col, &orig_dst, bsize); + av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst, bsize); restore_dst_buf(xd, orig_dst); mbmi->ref_frame[1] = INTRA_FRAME; mbmi->use_wedge_interintra = 0; @@ -8876,7 +7923,8 @@ static int64_t handle_inter_mode( av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb); - rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate_mv + rate_sum, dist_sum); + rd = + RDCOST(x->rdmult, x->rddiv, tmp_rate_mv + rate_sum + rmode, dist_sum); if (rd < best_interintra_rd) { best_interintra_rd = rd; best_interintra_mode = mbmi->interintra_mode; @@ -8907,7 +7955,7 @@ static int64_t handle_inter_mode( if (rd != INT64_MAX) rd = RDCOST(x->rdmult, x->rddiv, rmode + rate_mv + rwedge + rate_sum, dist_sum); - best_interintra_rd_nowedge = rd; + best_interintra_rd_nowedge = best_interintra_rd; // Disable wedge search if source variance is small if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) { @@ -8926,17 +7974,18 @@ static int64_t handle_inter_mode( // get negative of mask const uint8_t *mask = av1_get_contiguous_soft_mask( mbmi->interintra_wedge_index, 1, bsize); - do_masked_motion_search(cpi, x, mask, bw, bsize, mi_row, mi_col, - &tmp_mv, &tmp_rate_mv, 0); + tmp_mv.as_int = x->mbmi_ext->ref_mvs[refs[0]][0].as_int; + compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row, + mi_col, intrapred, mask, bw, + &tmp_rate_mv, 0, 0); mbmi->mv[0].as_int = tmp_mv.as_int; - av1_build_inter_predictors_sby(xd, mi_row, mi_col, &orig_dst, bsize); + av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, &orig_dst, + bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, 0, &rate_sum, &dist_sum, &tmp_skip_txfm_sb, &tmp_skip_sse_sb); rd = RDCOST(x->rdmult, x->rddiv, rmode + tmp_rate_mv + rwedge + rate_sum, dist_sum); - if (rd < best_interintra_rd_wedge) { - best_interintra_rd_wedge = rd; - } else { + if (rd >= best_interintra_rd_wedge) { tmp_mv.as_int = cur_mv[0].as_int; tmp_rate_mv = rate_mv; } @@ -8956,37 +8005,33 @@ static int64_t handle_inter_mode( best_interintra_rd_wedge = rd; if (best_interintra_rd_wedge < best_interintra_rd_nowedge) { mbmi->use_wedge_interintra = 1; - best_interintra_rd = best_interintra_rd_wedge; mbmi->mv[0].as_int = tmp_mv.as_int; rd_stats->rate += tmp_rate_mv - rate_mv; rate_mv = tmp_rate_mv; } else { mbmi->use_wedge_interintra = 0; - best_interintra_rd = best_interintra_rd_nowedge; mbmi->mv[0].as_int = cur_mv[0].as_int; } } else { mbmi->use_wedge_interintra = 0; - best_interintra_rd = best_interintra_rd_nowedge; } } #endif // CONFIG_WEDGE pred_exists = 0; - *args->compmode_interintra_cost = - av1_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 1); - *args->compmode_interintra_cost += + compmode_interintra_cost = + av1_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 1) + interintra_mode_cost[mbmi->interintra_mode]; if (is_interintra_wedge_used(bsize)) { - *args->compmode_interintra_cost += av1_cost_bit( + compmode_interintra_cost += av1_cost_bit( cm->fc->wedge_interintra_prob[bsize], mbmi->use_wedge_interintra); if (mbmi->use_wedge_interintra) { - *args->compmode_interintra_cost += + compmode_interintra_cost += av1_cost_literal(get_interintra_wedge_bits(bsize)); } } } else if (is_interintra_allowed(mbmi)) { - *args->compmode_interintra_cost = + compmode_interintra_cost = av1_cost_bit(cm->fc->interintra_prob[size_group_lookup[bsize]], 0); } #endif // CONFIG_INTERINTRA @@ -8994,7 +8039,7 @@ static int64_t handle_inter_mode( if (pred_exists == 0) { int tmp_rate; int64_t tmp_dist; - av1_build_inter_predictors_sb(xd, mi_row, mi_col, &orig_dst, bsize); + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, &orig_dst, bsize); model_rd_for_sb(cpi, bsize, x, xd, 0, MAX_MB_PLANE - 1, &tmp_rate, &tmp_dist, &skip_txfm_sb, &skip_sse_sb); rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); @@ -9034,10 +8079,23 @@ static int64_t handle_inter_mode( } } +#if CONFIG_EXT_INTER +#if CONFIG_INTERINTRA + rd_stats->rate += compmode_interintra_cost; +#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION + rate2_bmc_nocoeff += compmode_interintra_cost; +#endif +#endif +#if CONFIG_WEDGE || CONFIG_COMPOUND_SEGMENT + rd_stats->rate += compmode_interinter_cost; +#endif +#endif + ret_val = motion_mode_rd(cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv, disable_skip, mode_mv, mi_row, mi_col, args, ref_best_rd, refs, rate_mv, #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION + single_newmv, #if CONFIG_EXT_INTER rate2_bmc_nocoeff, &best_bmc_mbmi, #if CONFIG_MOTION_VAR @@ -9060,34 +8118,36 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *const xd = &x->e_mbd; const TileInfo *tile = &xd->tile; +#if CONFIG_EC_ADAPT + FRAME_CONTEXT *const ec_ctx = xd->tile_ctx; +#else + FRAME_CONTEXT *const ec_ctx = cm->fc; +#endif // CONFIG_EC_ADAPT MODE_INFO *const mi = xd->mi[0]; const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE); const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE); const int w = block_size_wide[bsize]; const int h = block_size_high[bsize]; const int sb_row = mi_row / MAX_MIB_SIZE; + const int sb_col = mi_col / MAX_MIB_SIZE; - int_mv dv_ref; - av1_find_ref_dv(&dv_ref, mi_row, mi_col); - - const MvLimits tmp_mv_limits = x->mv_limits; + MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; + MV_REFERENCE_FRAME ref_frame = INTRA_FRAME; + int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame]; + av1_find_mv_refs(cm, xd, mi, ref_frame, &mbmi_ext->ref_mv_count[ref_frame], + mbmi_ext->ref_mv_stack[ref_frame], +#if CONFIG_EXT_INTER + mbmi_ext->compound_mode_context, +#endif // CONFIG_EXT_INTER + candidates, mi_row, mi_col, NULL, NULL, + mbmi_ext->mode_context); - // TODO(aconverse@google.com): Handle same row DV. - x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE; - x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w; - x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE; - x->mv_limits.row_max = (sb_row * MAX_MIB_SIZE - mi_row) * MI_SIZE - h; - assert(x->mv_limits.col_min >= tmp_mv_limits.col_min); - assert(x->mv_limits.col_max <= tmp_mv_limits.col_max); - assert(x->mv_limits.row_min >= tmp_mv_limits.row_min); - assert(x->mv_limits.row_max <= tmp_mv_limits.row_max); - av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv); + int_mv nearestmv, nearmv; + av1_find_best_ref_mvs(0, candidates, &nearestmv, &nearmv); - if (x->mv_limits.col_max < x->mv_limits.col_min || - x->mv_limits.row_max < x->mv_limits.row_min) { - x->mv_limits = tmp_mv_limits; - return INT64_MAX; - } + int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv; + if (dv_ref.as_int == 0) av1_find_ref_dv(&dv_ref, mi_row, mi_col); + mbmi_ext->ref_mvs[INTRA_FRAME][0] = dv_ref; struct buf_2d yv12_mb[MAX_MB_PLANE]; av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, mi_row, mi_col, NULL, NULL); @@ -9095,86 +8155,140 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, xd->plane[i].pre[0] = yv12_mb[i]; } - int step_param = cpi->mv_step_param; - MV mvp_full = dv_ref.as_mv; - mvp_full.col >>= 3; - mvp_full.row >>= 3; - int sadpb = x->sadperbit16; - int cost_list[5]; - int bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, - sadpb, cond_cost_list(cpi, cost_list), - &dv_ref.as_mv, INT_MAX, 1); + enum IntrabcMotionDirection { + IBC_MOTION_ABOVE, + IBC_MOTION_LEFT, + IBC_MOTION_DIRECTIONS + }; - x->mv_limits = tmp_mv_limits; - if (bestsme == INT_MAX) return INT64_MAX; - mvp_full = x->best_mv.as_mv; - MV dv = {.row = mvp_full.row * 8, .col = mvp_full.col * 8 }; - if (mv_check_bounds(&x->mv_limits, &dv)) return INT64_MAX; - if (!is_dv_valid(dv, tile, mi_row, mi_col, bsize)) return INT64_MAX; MB_MODE_INFO *mbmi = &mi->mbmi; MB_MODE_INFO best_mbmi = *mbmi; RD_STATS best_rdcost = *rd_cost; int best_skip = x->skip; + + for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE; + dir < IBC_MOTION_DIRECTIONS; ++dir) { + const MvLimits tmp_mv_limits = x->mv_limits; + switch (dir) { + case IBC_MOTION_ABOVE: + x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE; + x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w; + x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE; + x->mv_limits.row_max = (sb_row * MAX_MIB_SIZE - mi_row) * MI_SIZE - h; + break; + case IBC_MOTION_LEFT: + x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE; + x->mv_limits.col_max = (sb_col * MAX_MIB_SIZE - mi_col) * MI_SIZE - w; + // TODO(aconverse@google.com): Minimize the overlap between above and + // left areas. + x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE; + int bottom_coded_mi_edge = + AOMMIN((sb_row + 1) * MAX_MIB_SIZE, tile->mi_row_end); + x->mv_limits.row_max = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h; + break; + default: assert(0); + } + assert(x->mv_limits.col_min >= tmp_mv_limits.col_min); + assert(x->mv_limits.col_max <= tmp_mv_limits.col_max); + assert(x->mv_limits.row_min >= tmp_mv_limits.row_min); + assert(x->mv_limits.row_max <= tmp_mv_limits.row_max); + av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv); + + if (x->mv_limits.col_max < x->mv_limits.col_min || + x->mv_limits.row_max < x->mv_limits.row_min) { + x->mv_limits = tmp_mv_limits; + continue; + } + + int step_param = cpi->mv_step_param; + MV mvp_full = dv_ref.as_mv; + mvp_full.col >>= 3; + mvp_full.row >>= 3; + int sadpb = x->sadperbit16; + int cost_list[5]; + int bestsme = av1_full_pixel_search(cpi, x, bsize, &mvp_full, step_param, + sadpb, cond_cost_list(cpi, cost_list), + &dv_ref.as_mv, INT_MAX, 1); + + x->mv_limits = tmp_mv_limits; + if (bestsme == INT_MAX) continue; + mvp_full = x->best_mv.as_mv; + MV dv = {.row = mvp_full.row * 8, .col = mvp_full.col * 8 }; + if (mv_check_bounds(&x->mv_limits, &dv)) continue; + if (!is_dv_valid(dv, tile, mi_row, mi_col, bsize)) continue; + #if CONFIG_PALETTE - memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info)); + memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info)); #endif - mbmi->use_intrabc = 1; - mbmi->mode = DC_PRED; - mbmi->uv_mode = DC_PRED; - mbmi->mv[0].as_mv = dv; + mbmi->use_intrabc = 1; + mbmi->mode = DC_PRED; + mbmi->uv_mode = DC_PRED; + mbmi->mv[0].as_mv = dv; #if CONFIG_DUAL_FILTER - for (int idx = 0; idx < 4; ++idx) mbmi->interp_filter[idx] = BILINEAR; + for (int idx = 0; idx < 4; ++idx) mbmi->interp_filter[idx] = BILINEAR; #else - mbmi->interp_filter = BILINEAR; + mbmi->interp_filter = BILINEAR; #endif - mbmi->skip = 0; - x->skip = 0; - av1_build_inter_predictors_sb(xd, mi_row, mi_col, NULL, bsize); - - int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->nmvjointcost, x->mvcost, - MV_COST_WEIGHT); - const PREDICTION_MODE A = av1_above_block_mode(mi, xd->above_mi, 0); - const PREDICTION_MODE L = av1_left_block_mode(mi, xd->left_mi, 0); - const int rate_mode = - cpi->y_mode_costs[A][L][DC_PRED] + av1_cost_bit(INTRABC_PROB, 1); - - RD_STATS rd_stats, rd_stats_uv; - av1_subtract_plane(x, bsize, 0); - super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX); - super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); - av1_merge_rd_stats(&rd_stats, &rd_stats_uv); + mbmi->skip = 0; + x->skip = 0; + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); + + int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, x->nmvjointcost, + x->mvcost, MV_COST_WEIGHT); + const PREDICTION_MODE A = av1_above_block_mode(mi, xd->above_mi, 0); + const PREDICTION_MODE L = av1_left_block_mode(mi, xd->left_mi, 0); + const int rate_mode = cpi->y_mode_costs[A][L][DC_PRED] + + av1_cost_bit(ec_ctx->intrabc_prob, 1); + + RD_STATS rd_stats, rd_stats_uv; + av1_subtract_plane(x, bsize, 0); + super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX); + super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); + av1_merge_rd_stats(&rd_stats, &rd_stats_uv); #if CONFIG_RD_DEBUG - mbmi->rd_stats = rd_stats; + mbmi->rd_stats = rd_stats; #endif - const aom_prob skip_prob = av1_get_skip_prob(cm, xd); - - RD_STATS rdc_noskip; - av1_init_rd_stats(&rdc_noskip); - rdc_noskip.rate = - rate_mode + rate_mv + rd_stats.rate + av1_cost_bit(skip_prob, 0); - rdc_noskip.dist = rd_stats.dist; - rdc_noskip.rdcost = - RDCOST(x->rdmult, x->rddiv, rdc_noskip.rate, rdc_noskip.dist); - if (rdc_noskip.rdcost < best_rd) { - best_rd = rdc_noskip.rdcost; - best_mbmi = *mbmi; - best_skip = x->skip; - best_rdcost = rdc_noskip; - } +#if CONFIG_VAR_TX + // TODO(aconverse@google.com): Evaluate allowing VAR TX on intrabc blocks + const int width = block_size_wide[bsize] >> tx_size_wide_log2[0]; + const int height = block_size_high[bsize] >> tx_size_high_log2[0]; + int idx, idy; + for (idy = 0; idy < height; ++idy) + for (idx = 0; idx < width; ++idx) + mbmi->inter_tx_size[idy >> 1][idx >> 1] = mbmi->tx_size; + mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size); +#endif // CONFIG_VAR_TX - x->skip = 1; - mbmi->skip = 1; - RD_STATS rdc_skip; - av1_init_rd_stats(&rdc_skip); - rdc_skip.rate = rate_mode + rate_mv + av1_cost_bit(skip_prob, 1); - rdc_skip.dist = rd_stats.sse; - rdc_skip.rdcost = RDCOST(x->rdmult, x->rddiv, rdc_skip.rate, rdc_skip.dist); - if (rdc_skip.rdcost < best_rd) { - best_rd = rdc_skip.rdcost; - best_mbmi = *mbmi; - best_skip = x->skip; - best_rdcost = rdc_skip; + const aom_prob skip_prob = av1_get_skip_prob(cm, xd); + + RD_STATS rdc_noskip; + av1_init_rd_stats(&rdc_noskip); + rdc_noskip.rate = + rate_mode + rate_mv + rd_stats.rate + av1_cost_bit(skip_prob, 0); + rdc_noskip.dist = rd_stats.dist; + rdc_noskip.rdcost = + RDCOST(x->rdmult, x->rddiv, rdc_noskip.rate, rdc_noskip.dist); + if (rdc_noskip.rdcost < best_rd) { + best_rd = rdc_noskip.rdcost; + best_mbmi = *mbmi; + best_skip = x->skip; + best_rdcost = rdc_noskip; + } + + x->skip = 1; + mbmi->skip = 1; + RD_STATS rdc_skip; + av1_init_rd_stats(&rdc_skip); + rdc_skip.rate = rate_mode + rate_mv + av1_cost_bit(skip_prob, 1); + rdc_skip.dist = rd_stats.sse; + rdc_skip.rdcost = RDCOST(x->rdmult, x->rddiv, rdc_skip.rate, rdc_skip.dist); + if (rdc_skip.rdcost < best_rd) { + best_rd = rdc_skip.rdcost; + best_mbmi = *mbmi; + best_skip = x->skip; + best_rdcost = rdc_skip; + } } *mbmi = best_mbmi; *rd_cost = best_rdcost; @@ -9200,6 +8314,7 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, xd->mi[0]->mbmi.ref_frame[1] = NONE_FRAME; #if CONFIG_INTRABC xd->mi[0]->mbmi.use_intrabc = 0; + xd->mi[0]->mbmi.mv[0].as_int = 0; #endif // CONFIG_INTRABC const int64_t intra_yrd = @@ -9212,11 +8327,8 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, if (intra_yrd < best_rd) { max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->mbmi.tx_size] [pd[1].subsampling_x][pd[1].subsampling_y]; - + init_sbuv_mode(&xd->mi[0]->mbmi); #if CONFIG_CB4X4 -#if !CONFIG_CHROMA_2X2 - max_uv_tx_size = AOMMAX(max_uv_tx_size, TX_4X4); -#endif // !CONFIG_CHROMA_2X2 if (!x->skip_chroma_rd) rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, bsize, max_uv_tx_size); @@ -9235,6 +8347,9 @@ void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, rd_cost->dist = dist_y + dist_uv; } rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + rd_cost->dist_y = dist_y; +#endif } else { rd_cost->rate = INT_MAX; } @@ -9602,10 +8717,8 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, int64_t best_pred_diff[REFERENCE_MODES]; int64_t best_pred_rd[REFERENCE_MODES]; MB_MODE_INFO best_mbmode; -#if CONFIG_REF_MV int rate_skip0 = av1_cost_bit(av1_get_skip_prob(cm, xd), 0); int rate_skip1 = av1_cost_bit(av1_get_skip_prob(cm, xd), 1); -#endif // CONFIG_REF_MV int best_mode_skippable = 0; int midx, best_mode_index = -1; unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME]; @@ -9635,13 +8748,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, const int *const intra_mode_cost = cpi->mbmode_cost[size_group_lookup[bsize]]; int best_skip2 = 0; uint8_t ref_frame_skip_mask[2] = { 0 }; -#if CONFIG_EXT_INTER uint32_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 }; +#if CONFIG_EXT_INTER && CONFIG_INTERINTRA MV_REFERENCE_FRAME best_single_inter_ref = LAST_FRAME; int64_t best_single_inter_rd = INT64_MAX; -#else - uint16_t mode_skip_mask[TOTAL_REFS_PER_FRAME] = { 0 }; -#endif // CONFIG_EXT_INTER +#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA int mode_skip_start = sf->mode_skip_start + 1; const int *const rd_threshes = rd_opt->threshes[segment_id][bsize]; const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize]; @@ -9663,8 +8774,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, NULL, NULL, NULL, - NULL, - NULL, #else // CONFIG_EXT_INTER NULL, #endif // CONFIG_EXT_INTER @@ -9681,15 +8790,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, const MODE_INFO *left_mi = xd->left_mi; #endif // CONFIG_PALETTE #if CONFIG_MOTION_VAR -#if CONFIG_HIGHBITDEPTH - DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); - DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); -#else - DECLARE_ALIGNED(16, uint8_t, tmp_buf1[MAX_MB_PLANE * MAX_SB_SQUARE]); - DECLARE_ALIGNED(16, uint8_t, tmp_buf2[MAX_MB_PLANE * MAX_SB_SQUARE]); -#endif // CONFIG_HIGHBITDEPTH - DECLARE_ALIGNED(16, int32_t, weighted_src_buf[MAX_SB_SQUARE]); - DECLARE_ALIGNED(16, int32_t, mask2d_buf[MAX_SB_SQUARE]); int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; @@ -9698,22 +8798,24 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int len = sizeof(uint16_t); - args.above_pred_buf[0] = CONVERT_TO_BYTEPTR(tmp_buf1); - args.above_pred_buf[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len); + args.above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf); + args.above_pred_buf[1] = + CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len); args.above_pred_buf[2] = - CONVERT_TO_BYTEPTR(tmp_buf1 + 2 * MAX_SB_SQUARE * len); - args.left_pred_buf[0] = CONVERT_TO_BYTEPTR(tmp_buf2); - args.left_pred_buf[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len); + CONVERT_TO_BYTEPTR(x->above_pred_buf + 2 * MAX_SB_SQUARE * len); + args.left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf); + args.left_pred_buf[1] = + CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len); args.left_pred_buf[2] = - CONVERT_TO_BYTEPTR(tmp_buf2 + 2 * MAX_SB_SQUARE * len); + CONVERT_TO_BYTEPTR(x->left_pred_buf + 2 * MAX_SB_SQUARE * len); } else { #endif // CONFIG_HIGHBITDEPTH - args.above_pred_buf[0] = tmp_buf1; - args.above_pred_buf[1] = tmp_buf1 + MAX_SB_SQUARE; - args.above_pred_buf[2] = tmp_buf1 + 2 * MAX_SB_SQUARE; - args.left_pred_buf[0] = tmp_buf2; - args.left_pred_buf[1] = tmp_buf2 + MAX_SB_SQUARE; - args.left_pred_buf[2] = tmp_buf2 + 2 * MAX_SB_SQUARE; + args.above_pred_buf[0] = x->above_pred_buf; + args.above_pred_buf[1] = x->above_pred_buf + MAX_SB_SQUARE; + args.above_pred_buf[2] = x->above_pred_buf + 2 * MAX_SB_SQUARE; + args.left_pred_buf[0] = x->left_pred_buf; + args.left_pred_buf[1] = x->left_pred_buf + MAX_SB_SQUARE; + args.left_pred_buf[2] = x->left_pred_buf + 2 * MAX_SB_SQUARE; #if CONFIG_HIGHBITDEPTH } #endif // CONFIG_HIGHBITDEPTH @@ -9731,11 +8833,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, } #endif // CONFIG_PALETTE -#if CONFIG_EXT_INTRA - memset(directional_mode_skip_mask, 0, - sizeof(directional_mode_skip_mask[0]) * INTRA_MODES); -#endif // CONFIG_EXT_INTRA - estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, &comp_mode_p); @@ -9756,9 +8853,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; x->mbmi_ext->mode_context[ref_frame] = 0; -#if CONFIG_REF_MV && CONFIG_EXT_INTER +#if CONFIG_EXT_INTER x->mbmi_ext->compound_mode_context[ref_frame] = 0; -#endif // CONFIG_REF_MV && CONFIG_EXT_INTER +#endif // CONFIG_EXT_INTER if (cpi->ref_frame_flags & flag_list[ref_frame]) { assert(get_ref_frame_buffer(cpi, ref_frame) != NULL); setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, @@ -9788,7 +8885,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #endif // CONFIG_EXT_INTER } -#if CONFIG_REF_MV for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) { MODE_INFO *const mi = xd->mi[0]; int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame]; @@ -9813,10 +8909,10 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, mbmi_ext->mode_context[ref_frame] &= ~(1 << ALL_ZERO_FLAG_OFFSET); } } -#endif // CONFIG_REF_MV #if CONFIG_MOTION_VAR av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col); + if (check_num_overlappable_neighbors(mbmi) && is_motion_variation_allowed_bsize(bsize)) { av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, @@ -9827,8 +8923,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, dst_height2, args.left_pred_stride); av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row, mi_col); - x->mask_buf = mask2d_buf; - x->wsrc_buf = weighted_src_buf; calc_target_weighted_pred(cm, x, xd, mi_row, mi_col, args.above_pred_buf[0], args.above_pred_stride[0], args.left_pred_buf[0], args.left_pred_stride[0]); @@ -9904,10 +8998,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #if CONFIG_EXT_INTER if (frame_mv[NEAREST_NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int) mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARESTMV); - if (frame_mv[NEAREST_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int) - mode_skip_mask[ALTREF_FRAME] |= (1 << NEAREST_NEARMV); - if (frame_mv[NEAR_NEARESTMV][ALTREF_FRAME].as_int != zeromv.as_int) - mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARESTMV); if (frame_mv[NEAR_NEARMV][ALTREF_FRAME].as_int != zeromv.as_int) mode_skip_mask[ALTREF_FRAME] |= (1 << NEAR_NEARMV); #endif // CONFIG_EXT_INTER @@ -9931,7 +9021,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, if (sf->adaptive_mode_search) { if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref && cpi->rc.frames_since_golden >= 3) - if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1)) + if ((x->pred_mv_sad[GOLDEN_FRAME] >> 1) > x->pred_mv_sad[LAST_FRAME]) mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL; } @@ -9985,18 +9075,16 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, int64_t this_rd = INT64_MAX; int disable_skip = 0; int compmode_cost = 0; -#if CONFIG_EXT_INTER - int compmode_interintra_cost = 0; - int compmode_interinter_cost = 0; -#endif // CONFIG_EXT_INTER int rate2 = 0, rate_y = 0, rate_uv = 0; int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + int64_t distortion2_y = 0; + int64_t total_sse_y = INT64_MAX; +#endif int skippable = 0; int this_skip2 = 0; int64_t total_sse = INT64_MAX; -#if CONFIG_REF_MV uint8_t ref_frame_type; -#endif // CONFIG_REF_MV #if CONFIG_PVQ od_encode_rollback(&x->daala_enc, &pre_buf); #endif // CONFIG_PVQ @@ -10004,9 +9092,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, this_mode = av1_mode_order[mode_index].mode; ref_frame = av1_mode_order[mode_index].ref_frame[0]; second_ref_frame = av1_mode_order[mode_index].ref_frame[1]; -#if CONFIG_REF_MV mbmi->ref_mv_idx = 0; -#endif // CONFIG_REF_MV #if CONFIG_EXT_INTER if (ref_frame > INTRA_FRAME && second_ref_frame == INTRA_FRAME) { @@ -10079,7 +9165,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, // This is only used in motion vector unit test. if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue; -#if CONFIG_LOWDELAY_COMPOUND // Changes LL bitstream +#if CONFIG_ONE_SIDED_COMPOUND // Changes LL bitstream #if CONFIG_EXT_REFS if (cpi->oxcf.pass == 0) { // Complexity-compression trade-offs @@ -10144,9 +9230,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #endif // CONFIG_GLOBAL_MOTION const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame }; if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, -#if CONFIG_REF_MV && CONFIG_EXT_INTER +#if CONFIG_EXT_INTER mbmi_ext->compound_mode_context, -#endif // CONFIG_REF_MV && CONFIG_EXT_INTER +#endif // CONFIG_EXT_INTER frame_mv, this_mode, ref_frames, bsize, -1, mi_row, mi_col)) continue; @@ -10181,9 +9267,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; } -#if CONFIG_EXT_INTER +#if CONFIG_EXT_INTER && CONFIG_INTERINTRA mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1); -#endif // CONFIG_EXT_INTER +#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA if (ref_frame == INTRA_FRAME) { RD_STATS rd_stats_y; @@ -10199,11 +9285,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, const uint8_t *src = x->plane[0].src.buf; #if CONFIG_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - highbd_angle_estimation(src, src_stride, rows, cols, + highbd_angle_estimation(src, src_stride, rows, cols, bsize, directional_mode_skip_mask); else #endif // CONFIG_HIGHBITDEPTH - angle_estimation(src, src_stride, rows, cols, + angle_estimation(src, src_stride, rows, cols, bsize, directional_mode_skip_mask); angle_stats_ready = 1; } @@ -10336,18 +9422,19 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, if (mbmi->mode != DC_PRED && mbmi->mode != TM_PRED) rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) distortion2_y = distortion_y; +#endif } else { -#if CONFIG_REF_MV int_mv backup_ref_mv[2]; #if !SUB8X8_COMP_REF - if (bsize < BLOCK_8X8 && mbmi->ref_frame[1] > INTRA_FRAME) continue; + if (bsize == BLOCK_4X4 && mbmi->ref_frame[1] > INTRA_FRAME) continue; #endif // !SUB8X8_COMP_REF backup_ref_mv[0] = mbmi_ext->ref_mvs[ref_frame][0]; if (comp_pred) backup_ref_mv[1] = mbmi_ext->ref_mvs[second_ref_frame][0]; -#endif // CONFIG_REF_MV -#if CONFIG_EXT_INTER +#if CONFIG_EXT_INTER && CONFIG_INTERINTRA if (second_ref_frame == INTRA_FRAME) { if (best_single_inter_ref != ref_frame) continue; mbmi->interintra_mode = intra_to_interintra_mode[best_intra_mode]; @@ -10365,8 +9452,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0; #endif // CONFIG_FILTER_INTRA } -#endif // CONFIG_EXT_INTER -#if CONFIG_REF_MV +#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA mbmi->ref_mv_idx = 0; ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); @@ -10411,7 +9497,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, #if CONFIG_EXT_INTER } #endif // CONFIG_EXT_INTER -#endif // CONFIG_REF_MV { RD_STATS rd_stats, rd_stats_y, rd_stats_uv; av1_init_rd_stats(&rd_stats); @@ -10421,18 +9506,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, args.single_newmv = single_newmv; #if CONFIG_EXT_INTER args.single_newmv_rate = single_newmv_rate; - args.compmode_interintra_cost = &compmode_interintra_cost; - args.compmode_interinter_cost = &compmode_interinter_cost; args.modelled_rd = modelled_rd; #endif // CONFIG_EXT_INTER this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &disable_skip, frame_mv, mi_row, mi_col, &args, best_rd); -// Prevent pointers from escaping local scope -#if CONFIG_EXT_INTER - args.compmode_interintra_cost = NULL; - args.compmode_interinter_cost = NULL; -#endif // CONFIG_EXT_INTER rate2 = rd_stats.rate; skippable = rd_stats.skip; @@ -10440,9 +9518,11 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, total_sse = rd_stats.sse; rate_y = rd_stats_y.rate; rate_uv = rd_stats_uv.rate; +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) distortion2_y = rd_stats_y.dist; +#endif } -#if CONFIG_REF_MV // TODO(jingning): This needs some refactoring to improve code quality // and reduce redundant steps. #if CONFIG_EXT_INTER @@ -10505,10 +9585,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, int ref; int_mv cur_mv; RD_STATS tmp_rd_stats, tmp_rd_stats_y, tmp_rd_stats_uv; -#if CONFIG_EXT_INTER - int tmp_compmode_interintra_cost = 0; - int tmp_compmode_interinter_cost = 0; -#endif // CONFIG_EXT_INTER av1_invalid_rd_stats(&tmp_rd_stats); x->skip = 0; @@ -10586,8 +9662,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, args.single_newmv = dummy_single_newmv; #if CONFIG_EXT_INTER args.single_newmv_rate = dummy_single_newmv_rate; - args.compmode_interintra_cost = &tmp_compmode_interintra_cost; - args.compmode_interinter_cost = &tmp_compmode_interinter_cost; args.modelled_rd = NULL; #endif // CONFIG_EXT_INTER tmp_alt_rd = handle_inter_mode( @@ -10597,8 +9671,6 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, args.single_newmv = NULL; #if CONFIG_EXT_INTER args.single_newmv_rate = NULL; - args.compmode_interintra_cost = NULL; - args.compmode_interinter_cost = NULL; #endif // CONFIG_EXT_INTER } @@ -10658,15 +9730,17 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, tmp_ref_rd = tmp_alt_rd; backup_mbmi = *mbmi; backup_skip = x->skip; +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) { + total_sse_y = tmp_rd_stats_y.sse; + distortion2_y = tmp_rd_stats_y.dist; + } +#endif #if CONFIG_VAR_TX for (i = 0; i < MAX_MB_PLANE; ++i) memcpy(x->blk_skip_drl[i], x->blk_skip[i], sizeof(uint8_t) * ctx->num_4x4_blk); #endif // CONFIG_VAR_TX -#if CONFIG_EXT_INTER - compmode_interintra_cost = tmp_compmode_interintra_cost; - compmode_interinter_cost = tmp_compmode_interinter_cost; -#endif // CONFIG_EXT_INTER } else { *mbmi = backup_mbmi; x->skip = backup_skip; @@ -10684,29 +9758,19 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, } mbmi_ext->ref_mvs[ref_frame][0] = backup_ref_mv[0]; if (comp_pred) mbmi_ext->ref_mvs[second_ref_frame][0] = backup_ref_mv[1]; -#endif // CONFIG_REF_MV if (this_rd == INT64_MAX) continue; #if SUB8X8_COMP_REF compmode_cost = av1_cost_bit(comp_mode_p, comp_pred); #else - if (mbmi->sb_type >= BLOCK_8X8) + if (mbmi->sb_type != BLOCK_4X4) compmode_cost = av1_cost_bit(comp_mode_p, comp_pred); #endif // SUB8X8_COMP_REF if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost; } -#if CONFIG_EXT_INTER - rate2 += compmode_interintra_cost; - if (cm->reference_mode != SINGLE_REFERENCE && comp_pred) -#if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION - if (mbmi->motion_mode == SIMPLE_TRANSLATION) -#endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION - rate2 += compmode_interinter_cost; -#endif // CONFIG_EXT_INTER - // Estimate the reference frame signaling cost and add it // to the rolling cost variable. if (comp_pred) { @@ -10731,14 +9795,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, // Cost the skip mb case rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1); } else if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) { -#if CONFIG_REF_MV if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + rate_skip0, distortion2) < RDCOST(x->rdmult, x->rddiv, rate_skip1, total_sse)) { -#else - if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < - RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { -#endif // CONFIG_REF_MV // Add in the cost of the no skip flag. rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0); } else { @@ -10750,6 +9809,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, this_skip2 = 1; rate_y = 0; rate_uv = 0; +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) distortion2_y = total_sse_y; +#endif } } else { // Add in the cost of the no skip flag. @@ -10775,13 +9837,13 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, best_intra_rd = this_rd; best_intra_mode = mbmi->mode; } -#if CONFIG_EXT_INTER +#if CONFIG_EXT_INTER && CONFIG_INTERINTRA } else if (second_ref_frame == NONE_FRAME) { if (this_rd < best_single_inter_rd) { best_single_inter_rd = this_rd; best_single_inter_ref = mbmi->ref_frame[0]; } -#endif // CONFIG_EXT_INTER +#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA } if (!disable_skip && ref_frame == INTRA_FRAME) { @@ -10839,7 +9901,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, best_rate_y = rate_y + av1_cost_bit(av1_get_skip_prob(cm, xd), this_skip2 || skippable); best_rate_uv = rate_uv; - +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2_y; +#endif #if CONFIG_VAR_TX for (i = 0; i < MAX_MB_PLANE; ++i) memcpy(ctx->blk_skip[i], x->blk_skip[i], @@ -10900,7 +9964,7 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, } if (is_inter_mode(mbmi->mode)) { - av1_build_inter_predictors_sb(xd, mi_row, mi_col, NULL, bsize); + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); #if CONFIG_MOTION_VAR if (mbmi->motion_mode == OBMC_CAUSAL) { av1_build_obmc_inter_prediction( @@ -10967,6 +10031,9 @@ void av1_rd_pick_inter_mode_sb(const AV1_COMP *cpi, TileDataEnc *tile_data, rd_cost->rate += (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv); rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist; +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) rd_cost->dist_y = rd_stats_y.dist; +#endif rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); best_skip2 = skip_blk; @@ -11111,9 +10178,7 @@ PALETTE_EXIT: best_mbmode.ref_frame[1] }; int comp_pred_mode = refs[1] > INTRA_FRAME; int_mv zeromv[2]; -#if CONFIG_REF_MV const uint8_t rf_type = av1_ref_frame_type(best_mbmode.ref_frame); -#endif // CONFIG_REF_MV #if CONFIG_GLOBAL_MOTION zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]], cm->allow_high_precision_mv, bsize, @@ -11129,7 +10194,6 @@ PALETTE_EXIT: zeromv[0].as_int = 0; zeromv[1].as_int = 0; #endif // CONFIG_GLOBAL_MOTION -#if CONFIG_REF_MV if (!comp_pred_mode) { int ref_set = (mbmi_ext->ref_mv_count[rf_type] >= 2) ? AOMMIN(2, mbmi_ext->ref_mv_count[rf_type] - 2) @@ -11196,17 +10260,9 @@ PALETTE_EXIT: nearmv[0] = mbmi_ext->ref_mv_stack[rf_type][i + 1].this_mv; nearmv[1] = mbmi_ext->ref_mv_stack[rf_type][i + 1].comp_mv; - // Try switching to the NEAR_NEAREST type modes first - if (nearestmv[0].as_int == best_mbmode.mv[0].as_int && + // Try switching to the NEAR_NEARMV mode + if (nearmv[0].as_int == best_mbmode.mv[0].as_int && nearmv[1].as_int == best_mbmode.mv[1].as_int) { - best_mbmode.mode = NEAREST_NEARMV; - best_mbmode.ref_mv_idx = i; - } else if (nearmv[0].as_int == best_mbmode.mv[0].as_int && - nearestmv[1].as_int == best_mbmode.mv[1].as_int) { - best_mbmode.mode = NEAR_NEARESTMV; - best_mbmode.ref_mv_idx = i; - } else if (nearmv[0].as_int == best_mbmode.mv[0].as_int && - nearmv[1].as_int == best_mbmode.mv[1].as_int) { best_mbmode.mode = NEAR_NEARMV; best_mbmode.ref_mv_idx = i; } @@ -11225,72 +10281,8 @@ PALETTE_EXIT: } #endif // CONFIG_EXT_INTER } -#else -#if CONFIG_EXT_INTER - if (!comp_pred_mode) { -#endif // CONFIG_EXT_INTER - if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int && - ((comp_pred_mode && - frame_mv[NEARESTMV][refs[1]].as_int == best_mbmode.mv[1].as_int) || - !comp_pred_mode)) - best_mbmode.mode = NEARESTMV; - else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int && - ((comp_pred_mode && - frame_mv[NEARMV][refs[1]].as_int == - best_mbmode.mv[1].as_int) || - !comp_pred_mode)) - best_mbmode.mode = NEARMV; - else if (best_mbmode.mv[0].as_int == zeromv[0].as_int && - ((comp_pred_mode && - best_mbmode.mv[1].as_int == zeromv[1].as_int) || - !comp_pred_mode)) - best_mbmode.mode = ZEROMV; -#if CONFIG_EXT_INTER - } else { -#if CONFIG_GLOBAL_MOTION - zeromv[0].as_int = gm_get_motion_vector(&cm->global_motion[refs[0]], - cm->allow_high_precision_mv, - bsize, mi_col, mi_row, 0) - .as_int; - zeromv[1].as_int = comp_pred_mode - ? gm_get_motion_vector(&cm->global_motion[refs[1]], - cm->allow_high_precision_mv, - bsize, mi_col, mi_row, 0) - .as_int - : 0; -#else - zeromv[0].as_int = 0; - zeromv[1].as_int = 0; -#endif // CONFIG_GLOBAL_MOTION - if (frame_mv[NEAREST_NEARESTMV][refs[0]].as_int == - best_mbmode.mv[0].as_int && - frame_mv[NEAREST_NEARESTMV][refs[1]].as_int == - best_mbmode.mv[1].as_int) - best_mbmode.mode = NEAREST_NEARESTMV; - else if (frame_mv[NEAREST_NEARMV][refs[0]].as_int == - best_mbmode.mv[0].as_int && - frame_mv[NEAREST_NEARMV][refs[1]].as_int == - best_mbmode.mv[1].as_int) - best_mbmode.mode = NEAREST_NEARMV; - else if (frame_mv[NEAR_NEARESTMV][refs[0]].as_int == - best_mbmode.mv[0].as_int && - frame_mv[NEAR_NEARESTMV][refs[1]].as_int == - best_mbmode.mv[1].as_int) - best_mbmode.mode = NEAR_NEARESTMV; - else if (frame_mv[NEAR_NEARMV][refs[0]].as_int == - best_mbmode.mv[0].as_int && - frame_mv[NEAR_NEARMV][refs[1]].as_int == - best_mbmode.mv[1].as_int) - best_mbmode.mode = NEAR_NEARMV; - else if (best_mbmode.mv[0].as_int == zeromv[0].as_int && - best_mbmode.mv[1].as_int == zeromv[1].as_int) - best_mbmode.mode = ZERO_ZEROMV; - } -#endif // CONFIG_EXT_INTER -#endif // CONFIG_REF_MV } -#if CONFIG_REF_MV // Make sure that the ref_mv_idx is only nonzero when we're // using a mode which can support ref_mv_idx if (best_mbmode.ref_mv_idx != 0 && @@ -11339,7 +10331,6 @@ PALETTE_EXIT: } } } -#endif // CONFIG_REF_MV if (best_mode_index < 0 || best_rd >= best_rd_so_far) { rd_cost->rate = INT_MAX; @@ -11412,14 +10403,12 @@ PALETTE_EXIT: } #endif // CONFIG_GLOBAL_MOTION -#if CONFIG_REF_MV for (i = 0; i < 1 + has_second_ref(mbmi); ++i) { if (mbmi->mode != NEWMV) mbmi->pred_mv[i].as_int = mbmi->mv[i].as_int; else mbmi->pred_mv[i].as_int = mbmi_ext->ref_mvs[mbmi->ref_frame[i]][0].as_int; } -#endif // CONFIG_REF_MV for (i = 0; i < REFERENCE_MODES; ++i) { if (best_pred_rd[i] == INT64_MAX) @@ -11502,10 +10491,8 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, mbmi->tx_size = max_txsize_lookup[bsize]; x->skip = 1; -#if CONFIG_REF_MV mbmi->ref_mv_idx = 0; mbmi->pred_mv[0].as_int = 0; -#endif // CONFIG_REF_MV mbmi->motion_mode = SIMPLE_TRANSLATION; #if CONFIG_MOTION_VAR @@ -11566,7 +10553,9 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, rd_cost->rate = rate2; rd_cost->dist = distortion2; rd_cost->rdcost = this_rd; - +#if CONFIG_DAALA_DIST && CONFIG_CB4X4 + if (bsize < BLOCK_8X8) rd_cost->dist_y = distortion2; +#endif if (this_rd >= best_rd_so_far) { rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; @@ -11589,791 +10578,6 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, 0); } -void av1_rd_pick_inter_mode_sub8x8(const struct AV1_COMP *cpi, - TileDataEnc *tile_data, struct macroblock *x, - int mi_row, int mi_col, - struct RD_STATS *rd_cost, -#if CONFIG_SUPERTX - int *returnrate_nocoef, -#endif // CONFIG_SUPERTX - BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, - int64_t best_rd_so_far) { - const AV1_COMMON *const cm = &cpi->common; - const RD_OPT *const rd_opt = &cpi->rd; - const SPEED_FEATURES *const sf = &cpi->sf; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; - const struct segmentation *const seg = &cm->seg; - MV_REFERENCE_FRAME ref_frame, second_ref_frame; - unsigned char segment_id = mbmi->segment_id; - int comp_pred, i; - int_mv frame_mv[MB_MODE_COUNT][TOTAL_REFS_PER_FRAME]; - struct buf_2d yv12_mb[TOTAL_REFS_PER_FRAME][MAX_MB_PLANE]; - static const int flag_list[TOTAL_REFS_PER_FRAME] = { - 0, - AOM_LAST_FLAG, -#if CONFIG_EXT_REFS - AOM_LAST2_FLAG, - AOM_LAST3_FLAG, -#endif // CONFIG_EXT_REFS - AOM_GOLD_FLAG, -#if CONFIG_EXT_REFS - AOM_BWD_FLAG, -#endif // CONFIG_EXT_REFS - AOM_ALT_FLAG - }; - int64_t best_rd = best_rd_so_far; - int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise - int64_t best_pred_diff[REFERENCE_MODES]; - int64_t best_pred_rd[REFERENCE_MODES]; - MB_MODE_INFO best_mbmode; - int ref_index, best_ref_index = 0; - unsigned int ref_costs_single[TOTAL_REFS_PER_FRAME]; - unsigned int ref_costs_comp[TOTAL_REFS_PER_FRAME]; - aom_prob comp_mode_p; -#if CONFIG_DUAL_FILTER - InterpFilter tmp_best_filter[4] = { 0 }; -#else - InterpFilter tmp_best_filter = SWITCHABLE; -#endif // CONFIG_DUAL_FILTER - int rate_uv_intra, rate_uv_tokenonly = INT_MAX; - int64_t dist_uv = INT64_MAX; - int skip_uv; - PREDICTION_MODE mode_uv = DC_PRED; - const int intra_cost_penalty = av1_get_intra_cost_penalty( - cm->base_qindex, cm->y_dc_delta_q, cm->bit_depth); - int_mv seg_mvs[4][TOTAL_REFS_PER_FRAME]; - b_mode_info best_bmodes[4]; - int best_skip2 = 0; - int ref_frame_skip_mask[2] = { 0 }; - int internal_active_edge = - av1_active_edge_sb(cpi, mi_row, mi_col) && av1_internal_image_edge(cpi); -#if CONFIG_PVQ - od_rollback_buffer pre_buf; - - od_encode_checkpoint(&x->daala_enc, &pre_buf); -#endif // CONFIG_PVQ - -#if CONFIG_SUPERTX - best_rd_so_far = INT64_MAX; - best_rd = best_rd_so_far; - best_yrd = best_rd_so_far; -#endif // CONFIG_SUPERTX - av1_zero(best_mbmode); - -#if CONFIG_FILTER_INTRA - mbmi->filter_intra_mode_info.use_filter_intra_mode[0] = 0; - mbmi->filter_intra_mode_info.use_filter_intra_mode[1] = 0; -#endif // CONFIG_FILTER_INTRA - mbmi->motion_mode = SIMPLE_TRANSLATION; -#if CONFIG_EXT_INTER - mbmi->interinter_compound_type = COMPOUND_AVERAGE; - mbmi->use_wedge_interintra = 0; -#endif // CONFIG_EXT_INTER -#if CONFIG_WARPED_MOTION - mbmi->num_proj_ref[0] = 0; - mbmi->num_proj_ref[1] = 0; -#endif // CONFIG_WARPED_MOTION - - for (i = 0; i < 4; i++) { - int j; - for (j = 0; j < TOTAL_REFS_PER_FRAME; j++) - seg_mvs[i][j].as_int = INVALID_MV; - } - - estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, - &comp_mode_p); - - for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX; - rate_uv_intra = INT_MAX; - - rd_cost->rate = INT_MAX; -#if CONFIG_SUPERTX - *returnrate_nocoef = INT_MAX; -#endif // CONFIG_SUPERTX - - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { - x->mbmi_ext->mode_context[ref_frame] = 0; -#if CONFIG_REF_MV && CONFIG_EXT_INTER - x->mbmi_ext->compound_mode_context[ref_frame] = 0; -#endif // CONFIG_REF_MV && CONFIG_EXT_INTER - if (cpi->ref_frame_flags & flag_list[ref_frame]) { - setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, - frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); - } else { - ref_frame_skip_mask[0] |= (1 << ref_frame); - ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; - } - frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; -#if CONFIG_EXT_INTER -#endif // CONFIG_EXT_INTER - frame_mv[ZEROMV][ref_frame].as_int = 0; - } - -#if CONFIG_PALETTE - mbmi->palette_mode_info.palette_size[0] = 0; - mbmi->palette_mode_info.palette_size[1] = 0; -#endif // CONFIG_PALETTE - - for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) { - int mode_excluded = 0; - int64_t this_rd = INT64_MAX; - int disable_skip = 0; - int compmode_cost = 0; - int rate2 = 0, rate_y = 0, rate_uv = 0; - int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; - int skippable = 0; - int this_skip2 = 0; - int64_t total_sse = INT_MAX; - -#if CONFIG_PVQ - od_encode_rollback(&x->daala_enc, &pre_buf); -#endif // CONFIG_PVQ - - ref_frame = av1_ref_order[ref_index].ref_frame[0]; - second_ref_frame = av1_ref_order[ref_index].ref_frame[1]; - -#if CONFIG_REF_MV - mbmi->ref_mv_idx = 0; -#endif // CONFIG_REF_MV - - // Look at the reference frame of the best mode so far and set the - // skip mask to look at a subset of the remaining modes. - if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) { - if (ref_index == 3) { - switch (best_mbmode.ref_frame[0]) { - case INTRA_FRAME: break; - case LAST_FRAME: - ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | -#if CONFIG_EXT_REFS - (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | - (1 << BWDREF_FRAME) | -#endif // CONFIG_EXT_REFS - (1 << ALTREF_FRAME); - ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; - break; -#if CONFIG_EXT_REFS - case LAST2_FRAME: - ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << LAST3_FRAME) | - (1 << GOLDEN_FRAME) | - (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME); - ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; - break; - case LAST3_FRAME: - ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << LAST2_FRAME) | - (1 << GOLDEN_FRAME) | - (1 << BWDREF_FRAME) | (1 << ALTREF_FRAME); - ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; - break; -#endif // CONFIG_EXT_REFS - case GOLDEN_FRAME: - ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | -#if CONFIG_EXT_REFS - (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | - (1 << BWDREF_FRAME) | -#endif // CONFIG_EXT_REFS - (1 << ALTREF_FRAME); - ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; - break; -#if CONFIG_EXT_REFS - case BWDREF_FRAME: - ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << LAST2_FRAME) | - (1 << LAST3_FRAME) | (1 << GOLDEN_FRAME) | - (1 << ALTREF_FRAME); - ref_frame_skip_mask[1] |= (1 << ALTREF_FRAME) | 0x01; - break; -#endif // CONFIG_EXT_REFS - case ALTREF_FRAME: - ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | -#if CONFIG_EXT_REFS - (1 << LAST2_FRAME) | (1 << LAST3_FRAME) | - (1 << BWDREF_FRAME) | -#endif // CONFIG_EXT_REFS - (1 << GOLDEN_FRAME); -#if CONFIG_EXT_REFS - ref_frame_skip_mask[1] |= (1 << BWDREF_FRAME) | 0x01; -#endif // CONFIG_EXT_REFS - break; - case NONE_FRAME: - case TOTAL_REFS_PER_FRAME: - assert(0 && "Invalid Reference frame"); - break; - } - } - } - - if ((ref_frame_skip_mask[0] & (1 << ref_frame)) && - (ref_frame_skip_mask[1] & (1 << AOMMAX(0, second_ref_frame)))) - continue; - - // Test best rd so far against threshold for trying this mode. - if (!internal_active_edge && - rd_less_than_thresh(best_rd, - rd_opt->threshes[segment_id][bsize][ref_index], - tile_data->thresh_freq_fact[bsize][ref_index])) - continue; - - // This is only used in motion vector unit test. - if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue; - -#if CONFIG_LOWDELAY_COMPOUND // Changes LL bitstream -#if CONFIG_EXT_REFS - if (cpi->oxcf.pass == 0) { - // Complexity-compression trade-offs - // if (ref_frame == ALTREF_FRAME) continue; - // if (ref_frame == BWDREF_FRAME) continue; - if (second_ref_frame == ALTREF_FRAME) continue; - // if (second_ref_frame == BWDREF_FRAME) continue; - } -#endif -#endif - comp_pred = second_ref_frame > INTRA_FRAME; - if (comp_pred) { - if (!cpi->allow_comp_inter_inter) continue; - if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue; - // Do not allow compound prediction if the segment level reference frame - // feature is in use as in this case there can only be one reference. - if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue; - - if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && - best_mbmode.ref_frame[0] == INTRA_FRAME) - continue; - } - - // TODO(jingning, jkoleszar): scaling reference frame not supported for - // sub8x8 blocks. - if (ref_frame > INTRA_FRAME && - av1_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) - continue; - - if (second_ref_frame > INTRA_FRAME && - av1_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf)) - continue; - - if (comp_pred) - mode_excluded = cm->reference_mode == SINGLE_REFERENCE; - else if (ref_frame != INTRA_FRAME) - mode_excluded = cm->reference_mode == COMPOUND_REFERENCE; - - // If the segment reference frame feature is enabled.... - // then do nothing if the current ref frame is not allowed.. - if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && - get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { - continue; - // Disable this drop out case if the ref frame - // segment level feature is enabled for this segment. This is to - // prevent the possibility that we end up unable to pick any mode. - } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { - // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, - // unless ARNR filtering is enabled in which case we want - // an unfiltered alternative. We allow near/nearest as well - // because they may result in zero-zero MVs but be cheaper. - if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) - continue; - } - - mbmi->tx_size = TX_4X4; - mbmi->uv_mode = DC_PRED; - mbmi->ref_frame[0] = ref_frame; - mbmi->ref_frame[1] = second_ref_frame; -// Evaluate all sub-pel filters irrespective of whether we can use -// them for this frame. -#if CONFIG_DUAL_FILTER - for (i = 0; i < 4; ++i) - mbmi->interp_filter[i] = cm->interp_filter == SWITCHABLE - ? EIGHTTAP_REGULAR - : cm->interp_filter; -#else - mbmi->interp_filter = - cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR : cm->interp_filter; -#endif // CONFIG_DUAL_FILTER - x->skip = 0; - set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); - - // Select prediction reference frames. - for (i = 0; i < MAX_MB_PLANE; i++) { - xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; - if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; - } - -#if CONFIG_VAR_TX - mbmi->inter_tx_size[0][0] = mbmi->tx_size; - mbmi->min_tx_size = get_min_tx_size(mbmi->tx_size); -#endif // CONFIG_VAR_TX - - if (ref_frame == INTRA_FRAME) { - int rate; - if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y, - NULL, best_rd) >= best_rd) - continue; - rate2 += rate; - rate2 += intra_cost_penalty; - distortion2 += distortion_y; - - if (rate_uv_intra == INT_MAX) { - choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4, &rate_uv_intra, - &rate_uv_tokenonly, &dist_uv, &skip_uv, &mode_uv); - } - rate2 += rate_uv_intra; - rate_uv = rate_uv_tokenonly; - distortion2 += dist_uv; - distortion_uv = dist_uv; - mbmi->uv_mode = mode_uv; - } else { - int rate; - int64_t distortion; - int64_t this_rd_thresh; - int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; - int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; - int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse; - int tmp_best_skippable = 0; - int switchable_filter_index; - int_mv *second_ref = - comp_pred ? &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL; - b_mode_info tmp_best_bmodes[16]; // Should this be 4 ? - MB_MODE_INFO tmp_best_mbmode; -#if CONFIG_DUAL_FILTER - BEST_SEG_INFO bsi[DUAL_FILTER_SET_SIZE]; -#else - BEST_SEG_INFO bsi[SWITCHABLE_FILTERS]; -#endif // CONFIG_DUAL_FILTER - int pred_exists = 0; - int uv_skippable; -#if CONFIG_EXT_INTER - int_mv compound_seg_newmvs[4][2]; - for (i = 0; i < 4; i++) { - compound_seg_newmvs[i][0].as_int = INVALID_MV; - compound_seg_newmvs[i][1].as_int = INVALID_MV; - } -#endif // CONFIG_EXT_INTER - - this_rd_thresh = (ref_frame == LAST_FRAME) - ? rd_opt->threshes[segment_id][bsize][THR_LAST] - : rd_opt->threshes[segment_id][bsize][THR_ALTR]; -#if CONFIG_EXT_REFS - this_rd_thresh = (ref_frame == LAST2_FRAME) - ? rd_opt->threshes[segment_id][bsize][THR_LAST2] - : this_rd_thresh; - this_rd_thresh = (ref_frame == LAST3_FRAME) - ? rd_opt->threshes[segment_id][bsize][THR_LAST3] - : this_rd_thresh; - this_rd_thresh = (ref_frame == BWDREF_FRAME) - ? rd_opt->threshes[segment_id][bsize][THR_BWDR] - : this_rd_thresh; -#endif // CONFIG_EXT_REFS - this_rd_thresh = (ref_frame == GOLDEN_FRAME) - ? rd_opt->threshes[segment_id][bsize][THR_GOLD] - : this_rd_thresh; - - // TODO(any): Add search of the tx_type to improve rd performance at the - // expense of speed. - mbmi->tx_type = DCT_DCT; - - if (cm->interp_filter != BILINEAR) { -#if CONFIG_DUAL_FILTER - tmp_best_filter[0] = EIGHTTAP_REGULAR; - tmp_best_filter[1] = EIGHTTAP_REGULAR; - tmp_best_filter[2] = EIGHTTAP_REGULAR; - tmp_best_filter[3] = EIGHTTAP_REGULAR; -#else - tmp_best_filter = EIGHTTAP_REGULAR; -#endif // CONFIG_DUAL_FILTER - if (x->source_variance < sf->disable_filter_search_var_thresh) { -#if CONFIG_DUAL_FILTER - tmp_best_filter[0] = EIGHTTAP_REGULAR; -#else - tmp_best_filter = EIGHTTAP_REGULAR; -#endif // CONFIG_DUAL_FILTER - } else if (sf->adaptive_pred_interp_filter == 1 && - ctx->pred_interp_filter < SWITCHABLE) { -#if CONFIG_DUAL_FILTER - tmp_best_filter[0] = ctx->pred_interp_filter; -#else - tmp_best_filter = ctx->pred_interp_filter; -#endif // CONFIG_DUAL_FILTER - } else if (sf->adaptive_pred_interp_filter == 2) { -#if CONFIG_DUAL_FILTER - tmp_best_filter[0] = ctx->pred_interp_filter < SWITCHABLE - ? ctx->pred_interp_filter - : 0; -#else - tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE - ? ctx->pred_interp_filter - : 0; -#endif // CONFIG_DUAL_FILTER - } else { -#if CONFIG_DUAL_FILTER - const int filter_set_size = DUAL_FILTER_SET_SIZE; -#else - const int filter_set_size = SWITCHABLE_FILTERS; -#endif // CONFIG_DUAL_FILTER - for (switchable_filter_index = 0; - switchable_filter_index < filter_set_size; - ++switchable_filter_index) { - int newbest, rs; - int64_t rs_rd; - MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext; -#if CONFIG_DUAL_FILTER - mbmi->interp_filter[0] = filter_sets[switchable_filter_index][0]; - mbmi->interp_filter[1] = filter_sets[switchable_filter_index][1]; - mbmi->interp_filter[2] = filter_sets[switchable_filter_index][0]; - mbmi->interp_filter[3] = filter_sets[switchable_filter_index][1]; -#else - mbmi->interp_filter = switchable_filter_index; -#endif // CONFIG_DUAL_FILTER - tmp_rd = rd_pick_inter_best_sub8x8_mode( - cpi, x, &mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd, - &rate, &rate_y, &distortion, &skippable, &total_sse, - (int)this_rd_thresh, seg_mvs, -#if CONFIG_EXT_INTER - compound_seg_newmvs, -#endif // CONFIG_EXT_INTER - bsi, switchable_filter_index, mi_row, mi_col); - if (tmp_rd == INT64_MAX) continue; - rs = av1_get_switchable_rate(cpi, xd); - rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); - if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd; - - newbest = (tmp_rd < tmp_best_rd); - if (newbest) { -#if CONFIG_DUAL_FILTER - tmp_best_filter[0] = mbmi->interp_filter[0]; - tmp_best_filter[1] = mbmi->interp_filter[1]; - tmp_best_filter[2] = mbmi->interp_filter[2]; - tmp_best_filter[3] = mbmi->interp_filter[3]; -#else - tmp_best_filter = mbmi->interp_filter; -#endif // CONFIG_DUAL_FILTER - tmp_best_rd = tmp_rd; - } - if ((newbest && cm->interp_filter == SWITCHABLE) || - ( -#if CONFIG_DUAL_FILTER - mbmi->interp_filter[0] == cm->interp_filter -#else - mbmi->interp_filter == cm->interp_filter -#endif // CONFIG_DUAL_FILTER - && cm->interp_filter != SWITCHABLE)) { - tmp_best_rdu = tmp_rd; - tmp_best_rate = rate; - tmp_best_ratey = rate_y; - tmp_best_distortion = distortion; - tmp_best_sse = total_sse; - tmp_best_skippable = skippable; - tmp_best_mbmode = *mbmi; - for (i = 0; i < 4; i++) { - tmp_best_bmodes[i] = xd->mi[0]->bmi[i]; - } - pred_exists = 1; - } - } // switchable_filter_index loop - } - } - - if (tmp_best_rdu == INT64_MAX && pred_exists) continue; - -#if CONFIG_DUAL_FILTER - mbmi->interp_filter[0] = - (cm->interp_filter == SWITCHABLE ? tmp_best_filter[0] - : cm->interp_filter); - mbmi->interp_filter[1] = - (cm->interp_filter == SWITCHABLE ? tmp_best_filter[1] - : cm->interp_filter); - mbmi->interp_filter[2] = - (cm->interp_filter == SWITCHABLE ? tmp_best_filter[2] - : cm->interp_filter); - mbmi->interp_filter[3] = - (cm->interp_filter == SWITCHABLE ? tmp_best_filter[3] - : cm->interp_filter); -#else - mbmi->interp_filter = - (cm->interp_filter == SWITCHABLE ? tmp_best_filter - : cm->interp_filter); -#endif // CONFIG_DUAL_FILTER - - if (!pred_exists) { - // Handles the special case when a filter that is not in the - // switchable list (bilinear) is indicated at the frame level - tmp_rd = rd_pick_inter_best_sub8x8_mode( - cpi, x, &x->mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd, - &rate, &rate_y, &distortion, &skippable, &total_sse, - (int)this_rd_thresh, seg_mvs, -#if CONFIG_EXT_INTER - compound_seg_newmvs, -#endif // CONFIG_EXT_INTER - bsi, 0, mi_row, mi_col); - if (tmp_rd == INT64_MAX) continue; - } else { - total_sse = tmp_best_sse; - rate = tmp_best_rate; - rate_y = tmp_best_ratey; - distortion = tmp_best_distortion; - skippable = tmp_best_skippable; - *mbmi = tmp_best_mbmode; - for (i = 0; i < 4; i++) xd->mi[0]->bmi[i] = tmp_best_bmodes[i]; - } - // Add in the cost of the transform type - if (!xd->lossless[mbmi->segment_id]) { - int rate_tx_type = 0; -#if CONFIG_EXT_TX - if (get_ext_tx_types(mbmi->tx_size, bsize, 1, cm->reduced_tx_set_used) > - 1) { - const int eset = - get_ext_tx_set(mbmi->tx_size, bsize, 1, cm->reduced_tx_set_used); - rate_tx_type = - cpi->inter_tx_type_costs[eset][mbmi->tx_size][mbmi->tx_type]; - } -#else - if (mbmi->tx_size < TX_32X32) { - rate_tx_type = cpi->inter_tx_type_costs[mbmi->tx_size][mbmi->tx_type]; - } -#endif // CONFIG_EXT_TX - rate += rate_tx_type; - rate_y += rate_tx_type; - } - - rate2 += rate; - distortion2 += distortion; - - if (cm->interp_filter == SWITCHABLE) - rate2 += av1_get_switchable_rate(cpi, xd); - - if (!mode_excluded) - mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE - : cm->reference_mode == COMPOUND_REFERENCE; - - compmode_cost = av1_cost_bit(comp_mode_p, comp_pred); - - tmp_best_rdu = - best_rd - AOMMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2), - RDCOST(x->rdmult, x->rddiv, 0, total_sse)); - - if (tmp_best_rdu > 0) { - // If even the 'Y' rd value of split is higher than best so far - // then dont bother looking at UV - int is_cost_valid_uv; - RD_STATS rd_stats_uv; - av1_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, NULL, - BLOCK_8X8); -#if CONFIG_VAR_TX - is_cost_valid_uv = - inter_block_uvrd(cpi, x, &rd_stats_uv, BLOCK_8X8, tmp_best_rdu); -#else - is_cost_valid_uv = - super_block_uvrd(cpi, x, &rd_stats_uv, BLOCK_8X8, tmp_best_rdu); -#endif // CONFIG_VAR_TX - rate_uv = rd_stats_uv.rate; - distortion_uv = rd_stats_uv.dist; - uv_skippable = rd_stats_uv.skip; - uv_sse = rd_stats_uv.sse; - - if (!is_cost_valid_uv) continue; - rate2 += rate_uv; - distortion2 += distortion_uv; - skippable = skippable && uv_skippable; - total_sse += uv_sse; - } else { - continue; - } - } - - if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost; - - // Estimate the reference frame signaling cost and add it - // to the rolling cost variable. - if (second_ref_frame > INTRA_FRAME) { - rate2 += ref_costs_comp[ref_frame]; -#if CONFIG_EXT_REFS - rate2 += ref_costs_comp[second_ref_frame]; -#endif // CONFIG_EXT_REFS - } else { - rate2 += ref_costs_single[ref_frame]; - } - - if (!disable_skip) { - // Skip is never coded at the segment level for sub8x8 blocks and instead - // always coded in the bitstream at the mode info level. - - if (ref_frame != INTRA_FRAME && !xd->lossless[mbmi->segment_id]) { - if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv, distortion2) < - RDCOST(x->rdmult, x->rddiv, 0, total_sse)) { - // Add in the cost of the no skip flag. - rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0); - } else { - // FIXME(rbultje) make this work for splitmv also - rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 1); - distortion2 = total_sse; - assert(total_sse >= 0); - rate2 -= (rate_y + rate_uv); - rate_y = 0; - rate_uv = 0; - this_skip2 = 1; - } - } else { - // Add in the cost of the no skip flag. - rate2 += av1_cost_bit(av1_get_skip_prob(cm, xd), 0); - } - - // Calculate the final RD estimate for this mode. - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); - } - - if (!disable_skip && ref_frame == INTRA_FRAME) { - for (i = 0; i < REFERENCE_MODES; ++i) - best_pred_rd[i] = AOMMIN(best_pred_rd[i], this_rd); - } - - // Did this mode help.. i.e. is it the new best mode - if (this_rd < best_rd || x->skip) { - if (!mode_excluded) { - // Note index of best mode so far - best_ref_index = ref_index; - - if (ref_frame == INTRA_FRAME) { - /* required for left and above block mv */ - mbmi->mv[0].as_int = 0; - } - - rd_cost->rate = rate2; -#if CONFIG_SUPERTX - *returnrate_nocoef = rate2 - rate_y - rate_uv; - if (!disable_skip) - *returnrate_nocoef -= - av1_cost_bit(av1_get_skip_prob(cm, xd), this_skip2); - *returnrate_nocoef -= av1_cost_bit(av1_get_intra_inter_prob(cm, xd), - mbmi->ref_frame[0] != INTRA_FRAME); - assert(*returnrate_nocoef > 0); -#endif // CONFIG_SUPERTX - rd_cost->dist = distortion2; - rd_cost->rdcost = this_rd; - best_rd = this_rd; - best_yrd = - best_rd - RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv); - best_mbmode = *mbmi; - best_skip2 = this_skip2; - -#if CONFIG_VAR_TX - for (i = 0; i < MAX_MB_PLANE; ++i) - memset(ctx->blk_skip[i], 0, sizeof(uint8_t) * ctx->num_4x4_blk); -#endif // CONFIG_VAR_TX - - for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i]; - } - } - - /* keep record of best compound/single-only prediction */ - if (!disable_skip && ref_frame != INTRA_FRAME) { - int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; - - if (cm->reference_mode == REFERENCE_MODE_SELECT) { - single_rate = rate2 - compmode_cost; - hybrid_rate = rate2; - } else { - single_rate = rate2; - hybrid_rate = rate2 + compmode_cost; - } - - single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); - hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); - - if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE]) - best_pred_rd[SINGLE_REFERENCE] = single_rd; - else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE]) - best_pred_rd[COMPOUND_REFERENCE] = single_rd; - - if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) - best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; - } - - if (x->skip && !comp_pred) break; - } - - if (best_rd >= best_rd_so_far) { - rd_cost->rate = INT_MAX; - rd_cost->rdcost = INT64_MAX; -#if CONFIG_SUPERTX - *returnrate_nocoef = INT_MAX; -#endif // CONFIG_SUPERTX - return; - } - - if (best_rd == INT64_MAX) { - rd_cost->rate = INT_MAX; - rd_cost->dist = INT64_MAX; - rd_cost->rdcost = INT64_MAX; -#if CONFIG_SUPERTX - *returnrate_nocoef = INT_MAX; -#endif // CONFIG_SUPERTX - return; - } - -#if CONFIG_DUAL_FILTER - assert((cm->interp_filter == SWITCHABLE) || - (cm->interp_filter == best_mbmode.interp_filter[0]) || - !is_inter_block(&best_mbmode)); -#else - assert((cm->interp_filter == SWITCHABLE) || - (cm->interp_filter == best_mbmode.interp_filter) || - !is_inter_block(&best_mbmode)); -#endif // CONFIG_DUAL_FILTER - - av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact, - sf->adaptive_rd_thresh, bsize, best_ref_index); - - // macroblock modes - *mbmi = best_mbmode; -#if CONFIG_VAR_TX - mbmi->inter_tx_size[0][0] = mbmi->tx_size; -#endif // CONFIG_VAR_TX - - x->skip |= best_skip2; - if (!is_inter_block(&best_mbmode)) { - for (i = 0; i < 4; i++) xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode; - } else { - for (i = 0; i < 4; ++i) - memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); - -#if CONFIG_REF_MV - mbmi->pred_mv[0].as_int = xd->mi[0]->bmi[3].pred_mv[0].as_int; - mbmi->pred_mv[1].as_int = xd->mi[0]->bmi[3].pred_mv[1].as_int; -#endif // CONFIG_REF_MV - mbmi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int; - mbmi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int; - } - -// Note: this section is needed since the mode may have been forced to ZEROMV -#if CONFIG_GLOBAL_MOTION - if (mbmi->mode == ZEROMV -#if CONFIG_EXT_INTER - || mbmi->mode == ZERO_ZEROMV -#endif // CONFIG_EXT_INTER - ) { - if (is_nontrans_global_motion(xd)) { -#if CONFIG_DUAL_FILTER - mbmi->interp_filter[0] = cm->interp_filter == SWITCHABLE - ? EIGHTTAP_REGULAR - : cm->interp_filter; - mbmi->interp_filter[1] = cm->interp_filter == SWITCHABLE - ? EIGHTTAP_REGULAR - : cm->interp_filter; -#else - mbmi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP_REGULAR - : cm->interp_filter; -#endif // CONFIG_DUAL_FILTER - } - } -#endif // CONFIG_GLOBAL_MOTION - - for (i = 0; i < REFERENCE_MODES; ++i) { - if (best_pred_rd[i] == INT64_MAX) - best_pred_diff[i] = INT_MIN; - else - best_pred_diff[i] = best_rd - best_pred_rd[i]; - } - - store_coding_context(x, ctx, best_ref_index, best_pred_diff, 0); -} - #if CONFIG_MOTION_VAR // This function has a structure similar to av1_build_obmc_inter_prediction // @@ -12454,9 +10658,14 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, i = 0; do { // for each mi in the above row const int mi_col_offset = i; - const MB_MODE_INFO *const above_mbmi = + const MB_MODE_INFO *above_mbmi = &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi; - const BLOCK_SIZE a_bsize = above_mbmi->sb_type; +#if CONFIG_CHROMA_SUB8X8 + if (above_mbmi->sb_type < BLOCK_8X8) + above_mbmi = + &xd->mi[mi_col_offset + 1 + mi_row_offset * xd->mi_stride]->mbmi; +#endif + const BLOCK_SIZE a_bsize = AOMMAX(above_mbmi->sb_type, BLOCK_8X8); const int mi_step = AOMMIN(xd->n8_w, mi_size_wide[a_bsize]); const int neighbor_bw = mi_step * MI_SIZE; @@ -12528,9 +10737,15 @@ static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, i = 0; do { // for each mi in the left column const int mi_row_offset = i; - const MB_MODE_INFO *const left_mbmi = + MB_MODE_INFO *left_mbmi = &xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride]->mbmi; - const BLOCK_SIZE l_bsize = left_mbmi->sb_type; + +#if CONFIG_CHROMA_SUB8X8 + if (left_mbmi->sb_type < BLOCK_8X8) + left_mbmi = + &xd->mi[mi_col_offset + (mi_row_offset + 1) * xd->mi_stride]->mbmi; +#endif + const BLOCK_SIZE l_bsize = AOMMAX(left_mbmi->sb_type, BLOCK_8X8); const int mi_step = AOMMIN(xd->n8_h, mi_size_high[l_bsize]); const int neighbor_bh = mi_step * MI_SIZE; @@ -12636,7 +10851,7 @@ void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x, av1_setup_dst_planes(x->e_mbd.plane, bsize, get_frame_new_buffer(&cpi->common), mi_row, mi_col); - av1_build_inter_predictors_sb(xd, mi_row, mi_col, NULL, bsize); + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); av1_subtract_plane(x, bsize, 0); super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); diff --git a/third_party/aom/av1/encoder/rdopt.h b/third_party/aom/av1/encoder/rdopt.h index a7053b289..e5d778fe5 100644 --- a/third_party/aom/av1/encoder/rdopt.h +++ b/third_party/aom/av1/encoder/rdopt.h @@ -62,6 +62,12 @@ void av1_dist_block(const AV1_COMP *cpi, MACROBLOCK *x, int plane, TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse, OUTPUT_STATUS output_status); +#if CONFIG_DAALA_DIST +int64_t av1_daala_dist(const uint8_t *src, int src_stride, const uint8_t *dst, + int dst_stride, int bsw, int bsh, int qm, + int use_activity_masking, int qindex); +#endif + #if !CONFIG_PVQ || CONFIG_VAR_TX int av1_cost_coeffs(const AV1_COMP *const cpi, MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, const SCAN_ORDER *scan_order, @@ -101,16 +107,6 @@ int av1_active_h_edge(const struct AV1_COMP *cpi, int mi_row, int mi_step); int av1_active_v_edge(const struct AV1_COMP *cpi, int mi_col, int mi_step); int av1_active_edge_sb(const struct AV1_COMP *cpi, int mi_row, int mi_col); -void av1_rd_pick_inter_mode_sub8x8(const struct AV1_COMP *cpi, - struct TileDataEnc *tile_data, - struct macroblock *x, int mi_row, int mi_col, - struct RD_STATS *rd_cost, -#if CONFIG_SUPERTX - int *returnrate_nocoef, -#endif // CONFIG_SUPERTX - BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, - int64_t best_rd_so_far); - #if CONFIG_MOTION_VAR && CONFIG_NCOBMC void av1_check_ncobmc_rd(const struct AV1_COMP *cpi, struct macroblock *x, int mi_row, int mi_col); diff --git a/third_party/aom/av1/encoder/speed_features.c b/third_party/aom/av1/encoder/speed_features.c index 20c96761b..e2275a54f 100644 --- a/third_party/aom/av1/encoder/speed_features.c +++ b/third_party/aom/av1/encoder/speed_features.c @@ -139,8 +139,10 @@ static void set_good_speed_feature_framesize_dependent(AV1_COMP *cpi, } } -static void set_good_speed_feature(AV1_COMP *cpi, AV1_COMMON *cm, - SPEED_FEATURES *sf, int speed) { +static void set_good_speed_features_framesize_independent(AV1_COMP *cpi, + SPEED_FEATURES *sf, + int speed) { + AV1_COMMON *const cm = &cpi->common; const int boosted = frame_is_boosted(cpi); if (speed >= 1) { @@ -205,6 +207,9 @@ static void set_good_speed_feature(AV1_COMP *cpi, AV1_COMMON *cm, #if CONFIG_EXT_TX sf->tx_type_search.prune_mode = PRUNE_TWO; #endif +#if CONFIG_GLOBAL_MOTION + sf->gm_search_type = GM_DISABLE_SEARCH; +#endif // CONFIG_GLOBAL_MOTION } if (speed >= 4) { @@ -286,6 +291,12 @@ static void set_good_speed_feature(AV1_COMP *cpi, AV1_COMMON *cm, sf->coeff_prob_appx_step = 4; sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH; } + if (speed >= 8) { + sf->mv.search_method = FAST_DIAMOND; + sf->mv.fullpel_search_step_param = 10; + sf->mv.subpel_force_stop = 2; + sf->lpf_pick = LPF_PICK_MINIMAL_LPF; + } } void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi) { @@ -339,12 +350,13 @@ void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi) { } void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) { - SPEED_FEATURES *const sf = &cpi->sf; AV1_COMMON *const cm = &cpi->common; + SPEED_FEATURES *const sf = &cpi->sf; MACROBLOCK *const x = &cpi->td.mb; const AV1EncoderConfig *const oxcf = &cpi->oxcf; int i; + (void)cm; // best quality defaults sf->frame_parameter_update = 1; sf->mv.search_method = NSTEP; @@ -418,13 +430,16 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) { // Set this at the appropriate speed levels sf->use_transform_domain_distortion = 0; +#if CONFIG_GLOBAL_MOTION + sf->gm_search_type = GM_FULL_SEARCH; +#endif // CONFIG_GLOBAL_MOTION if (oxcf->mode == GOOD #if CONFIG_XIPHRC || oxcf->pass == 1 #endif ) - set_good_speed_feature(cpi, cm, sf, oxcf->speed); + set_good_speed_features_framesize_independent(cpi, sf, oxcf->speed); // sf->partition_search_breakout_dist_thr is set assuming max 64x64 // blocks. Normalise this if the blocks are bigger. diff --git a/third_party/aom/av1/encoder/speed_features.h b/third_party/aom/av1/encoder/speed_features.h index af54a1a9a..5710d77c7 100644 --- a/third_party/aom/av1/encoder/speed_features.h +++ b/third_party/aom/av1/encoder/speed_features.h @@ -24,6 +24,9 @@ enum { (1 << D207_PRED) | (1 << D63_PRED) | #if CONFIG_ALT_INTRA (1 << SMOOTH_PRED) | +#if CONFIG_SMOOTH_HV + (1 << SMOOTH_V_PRED) | (1 << SMOOTH_H_PRED) | +#endif // CONFIG_SMOOTH_HV #endif // CONFIG_ALT_INTRA (1 << TM_PRED), INTRA_DC = (1 << DC_PRED), @@ -36,37 +39,33 @@ enum { #if CONFIG_EXT_INTER enum { INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV) | - (1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) | - (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) | (1 << NEW_NEWMV) | + (1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) | (1 << NEW_NEWMV) | (1 << NEAREST_NEWMV) | (1 << NEAR_NEWMV) | (1 << NEW_NEARMV) | (1 << NEW_NEARESTMV) | (1 << ZERO_ZEROMV), INTER_NEAREST = (1 << NEARESTMV) | (1 << NEAREST_NEARESTMV) | - (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) | (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV), INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV) | (1 << NEAREST_NEARESTMV) | (1 << NEW_NEWMV) | - (1 << NEAR_NEARESTMV) | (1 << NEAREST_NEARMV) | (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) | (1 << NEW_NEARMV) | (1 << NEAR_NEWMV), INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | - (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) | (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV), - INTER_NEAREST_NEW_ZERO = - (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV) | - (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | (1 << NEW_NEWMV) | - (1 << NEAREST_NEARMV) | (1 << NEAR_NEARESTMV) | (1 << NEW_NEARESTMV) | - (1 << NEAREST_NEWMV) | (1 << NEW_NEARMV) | (1 << NEAR_NEWMV), - INTER_NEAREST_NEAR_NEW = - (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV) | - (1 << NEAREST_NEARESTMV) | (1 << NEW_NEWMV) | (1 << NEAREST_NEARMV) | - (1 << NEAR_NEARESTMV) | (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) | - (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | (1 << NEAR_NEARMV), - INTER_NEAREST_NEAR_ZERO = - (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | - (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | (1 << NEAREST_NEARMV) | - (1 << NEAR_NEARESTMV) | (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) | - (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | (1 << NEAR_NEARMV), + INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV) | + (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | + (1 << NEW_NEWMV) | (1 << NEW_NEARESTMV) | + (1 << NEAREST_NEWMV) | (1 << NEW_NEARMV) | + (1 << NEAR_NEWMV), + INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV) | + (1 << NEAREST_NEARESTMV) | (1 << NEW_NEWMV) | + (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) | + (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | + (1 << NEAR_NEARMV), + INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | + (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | + (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) | + (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | + (1 << NEAR_NEARMV), }; #else enum { @@ -196,14 +195,7 @@ typedef enum { // Always use a fixed size partition FIXED_PARTITION, - REFERENCE_PARTITION, - - // Use an arbitrary partitioning scheme based on source variance within - // a 64X64 SB - VAR_BASED_PARTITION, - - // Use non-fixed partitions based on source variance - SOURCE_VAR_BASED_PARTITION + REFERENCE_PARTITION } PARTITION_SEARCH_TYPE; typedef enum { @@ -251,6 +243,14 @@ typedef struct MESH_PATTERN { int interval; } MESH_PATTERN; +#if CONFIG_GLOBAL_MOTION +typedef enum { + GM_FULL_SEARCH, + GM_REDUCED_REF_SEARCH, + GM_DISABLE_SEARCH +} GM_SEARCH_TYPE; +#endif // CONFIG_GLOBAL_MOTION + typedef struct SPEED_FEATURES { MV_SPEED_FEATURES mv; @@ -432,7 +432,7 @@ typedef struct SPEED_FEATURES { // TODO(aconverse): Fold this into one of the other many mode skips BLOCK_SIZE max_intra_bsize; - // The frequency that we check if SOURCE_VAR_BASED_PARTITION or + // The frequency that we check if // FIXED_PARTITION search type should be used. int search_type_check_frequency; @@ -470,6 +470,10 @@ typedef struct SPEED_FEATURES { // Whether to compute distortion in the image domain (slower but // more accurate), or in the transform domain (faster but less acurate). int use_transform_domain_distortion; + +#if CONFIG_GLOBAL_MOTION + GM_SEARCH_TYPE gm_search_type; +#endif // CONFIG_GLOBAL_MOTION } SPEED_FEATURES; struct AV1_COMP; diff --git a/third_party/aom/av1/encoder/subexp.c b/third_party/aom/av1/encoder/subexp.c index 8960d3341..6a8ba12d8 100644 --- a/third_party/aom/av1/encoder/subexp.c +++ b/third_party/aom/av1/encoder/subexp.c @@ -179,83 +179,6 @@ int av1_prob_diff_update_savings_search_model(const unsigned int *ct, return bestsavings; } -#if CONFIG_SUBFRAME_PROB_UPDATE -static int get_cost(unsigned int ct[][2], aom_prob p, int n) { - int i, p0 = p; - unsigned int total_ct[2] = { 0, 0 }; - int cost = 0; - - for (i = 0; i <= n; ++i) { - cost += cost_branch256(ct[i], p); - total_ct[0] += ct[i][0]; - total_ct[1] += ct[i][1]; - if (i < n) - p = av1_merge_probs(p0, total_ct, COEF_COUNT_SAT, COEF_MAX_UPDATE_FACTOR); - } - return cost; -} - -int av1_prob_update_search_subframe(unsigned int ct[][2], aom_prob oldp, - aom_prob *bestp, aom_prob upd, int n) { - const int old_b = get_cost(ct, oldp, n); - int bestsavings = 0; - const int upd_cost = av1_cost_one(upd) - av1_cost_zero(upd); - aom_prob newp, bestnewp = oldp; - const int step = *bestp > oldp ? -1 : 1; - - for (newp = *bestp; newp != oldp; newp += step) { - const int new_b = get_cost(ct, newp, n); - const int update_b = prob_diff_update_cost(newp, oldp) + upd_cost; - const int savings = old_b - new_b - update_b; - if (savings > bestsavings) { - bestsavings = savings; - bestnewp = newp; - } - } - *bestp = bestnewp; - return bestsavings; -} - -int av1_prob_update_search_model_subframe( - unsigned int ct[ENTROPY_NODES][COEF_PROBS_BUFS][2], const aom_prob *oldp, - aom_prob *bestp, aom_prob upd, int stepsize, int n) { - int i, old_b, new_b, update_b, savings, bestsavings; - int newp; - const int step_sign = *bestp > oldp[PIVOT_NODE] ? -1 : 1; - const int step = stepsize * step_sign; - const int upd_cost = av1_cost_one(upd) - av1_cost_zero(upd); - aom_prob bestnewp, newplist[ENTROPY_NODES], oldplist[ENTROPY_NODES]; - av1_model_to_full_probs(oldp, oldplist); - memcpy(newplist, oldp, sizeof(aom_prob) * UNCONSTRAINED_NODES); - for (i = UNCONSTRAINED_NODES, old_b = 0; i < ENTROPY_NODES; ++i) - old_b += get_cost(ct[i], oldplist[i], n); - old_b += get_cost(ct[PIVOT_NODE], oldplist[PIVOT_NODE], n); - - bestsavings = 0; - bestnewp = oldp[PIVOT_NODE]; - - assert(stepsize > 0); - - for (newp = *bestp; (newp - oldp[PIVOT_NODE]) * step_sign < 0; newp += step) { - if (newp < 1 || newp > 255) continue; - newplist[PIVOT_NODE] = newp; - av1_model_to_full_probs(newplist, newplist); - for (i = UNCONSTRAINED_NODES, new_b = 0; i < ENTROPY_NODES; ++i) - new_b += get_cost(ct[i], newplist[i], n); - new_b += get_cost(ct[PIVOT_NODE], newplist[PIVOT_NODE], n); - update_b = prob_diff_update_cost(newp, oldp[PIVOT_NODE]) + upd_cost; - savings = old_b - new_b - update_b; - if (savings > bestsavings) { - bestsavings = savings; - bestnewp = newp; - } - } - - *bestp = bestnewp; - return bestsavings; -} -#endif // CONFIG_SUBFRAME_PROB_UPDATE - void av1_cond_prob_diff_update(aom_writer *w, aom_prob *oldp, const unsigned int ct[2], int probwt) { const aom_prob upd = DIFF_UPDATE_PROB; diff --git a/third_party/aom/av1/encoder/subexp.h b/third_party/aom/av1/encoder/subexp.h index 049265cb8..580edabdb 100644 --- a/third_party/aom/av1/encoder/subexp.h +++ b/third_party/aom/av1/encoder/subexp.h @@ -35,13 +35,6 @@ int av1_prob_diff_update_savings_search_model(const unsigned int *ct, int av1_cond_prob_diff_update_savings(aom_prob *oldp, const unsigned int ct[2], int probwt); -#if CONFIG_SUBFRAME_PROB_UPDATE -int av1_prob_update_search_subframe(unsigned int ct[][2], aom_prob oldp, - aom_prob *bestp, aom_prob upd, int n); -int av1_prob_update_search_model_subframe( - unsigned int ct[ENTROPY_NODES][COEF_PROBS_BUFS][2], const aom_prob *oldp, - aom_prob *bestp, aom_prob upd, int stepsize, int n); -#endif // CONFIG_SUBFRAME_PROB_UPDATE #ifdef __cplusplus } // extern "C" #endif diff --git a/third_party/aom/av1/encoder/temporal_filter.c b/third_party/aom/av1/encoder/temporal_filter.c index de962fe84..1ed1ebdb2 100644 --- a/third_party/aom/av1/encoder/temporal_filter.c +++ b/third_party/aom/av1/encoder/temporal_filter.c @@ -281,14 +281,10 @@ static int temporal_filter_find_matching_mb_c(AV1_COMP *cpi, av1_set_mv_search_range(&x->mv_limits, &best_ref_mv1); -#if CONFIG_REF_MV x->mvcost = x->mv_cost_stack[0]; x->nmvjointcost = x->nmv_vec_cost[0]; - x->mvsadcost = x->mvcost; - x->nmvjointsadcost = x->nmvjointcost; -#endif - // Ignore mv costing by sending NULL pointer instead of cost arrays + // Use mv costing from x->mvcost directly av1_hex_search(x, &best_ref_mv1_full, step_param, sadpb, 1, cond_cost_list(cpi, cost_list), &cpi->fn_ptr[BLOCK_16X16], 0, &best_ref_mv1); @@ -299,8 +295,11 @@ static int temporal_filter_find_matching_mb_c(AV1_COMP *cpi, bestsme = cpi->find_fractional_mv_step( x, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_iters_per_step, - cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0, - 0); + cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, +#if CONFIG_EXT_INTER + NULL, 0, 0, +#endif + 0, 0, 0); x->e_mbd.mi[0]->bmi[0].as_mv[0] = x->best_mv; diff --git a/third_party/aom/av1/encoder/tokenize.c b/third_party/aom/av1/encoder/tokenize.c index f48493bf8..18d2cd958 100644 --- a/third_party/aom/av1/encoder/tokenize.c +++ b/third_party/aom/av1/encoder/tokenize.c @@ -23,6 +23,9 @@ #include "av1/encoder/cost.h" #include "av1/encoder/encoder.h" +#if CONFIG_LV_MAP +#include "av1/encoder/encodetxb.c" +#endif #include "av1/encoder/rdopt.h" #include "av1/encoder/tokenize.h" @@ -261,20 +264,6 @@ const av1_extra_bit av1_extra_bits[ENTROPY_TOKENS] = { }; #endif -#if !CONFIG_EC_MULTISYMBOL -const struct av1_token av1_coef_encodings[ENTROPY_TOKENS] = { - { 2, 2 }, { 6, 3 }, { 28, 5 }, { 58, 6 }, { 59, 6 }, { 60, 6 }, - { 61, 6 }, { 124, 7 }, { 125, 7 }, { 126, 7 }, { 127, 7 }, { 0, 1 } -}; -#endif // !CONFIG_EC_MULTISYMBOL - -struct tokenize_b_args { - const AV1_COMP *cpi; - ThreadData *td; - TOKENEXTRA **tp; - int this_rate; -}; - #if !CONFIG_PVQ || CONFIG_VAR_TX static void cost_coeffs_b(int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { @@ -314,7 +303,6 @@ static void set_entropy_context_b(int plane, int block, int blk_row, blk_row); } -#if CONFIG_NEW_TOKENSET static INLINE void add_token(TOKENEXTRA **t, aom_cdf_prob (*tail_cdf)[CDF_SIZE(ENTROPY_TOKENS)], aom_cdf_prob (*head_cdf)[CDF_SIZE(ENTROPY_TOKENS)], @@ -328,25 +316,6 @@ static INLINE void add_token(TOKENEXTRA **t, (*t)->first_val = first_val; (*t)++; } - -#else // CONFIG_NEW_TOKENSET -static INLINE void add_token( - TOKENEXTRA **t, const aom_prob *context_tree, -#if CONFIG_EC_MULTISYMBOL - aom_cdf_prob (*token_cdf)[CDF_SIZE(ENTROPY_TOKENS)], -#endif // CONFIG_EC_MULTISYMBOL - int32_t extra, uint8_t token, uint8_t skip_eob_node, unsigned int *counts) { - (*t)->token = token; - (*t)->extra = extra; - (*t)->context_tree = context_tree; -#if CONFIG_EC_MULTISYMBOL - (*t)->token_cdf = token_cdf; -#endif // CONFIG_EC_MULTISYMBOL - (*t)->skip_eob_node = skip_eob_node; - (*t)++; - ++counts[token]; -} -#endif // CONFIG_NEW_TOKENSET #endif // !CONFIG_PVQ || CONFIG_VAR_TX #if CONFIG_PALETTE @@ -471,22 +440,11 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col, const int ref = is_inter_block(mbmi); unsigned int(*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] = td->rd_counts.coef_counts[txsize_sqr_map[tx_size]][type][ref]; -#if !CONFIG_NEW_TOKENSET -#if CONFIG_SUBFRAME_PROB_UPDATE - const aom_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = - cpi->subframe_stats.coef_probs_buf[cpi->common.coef_probs_update_idx] - [txsize_sqr_map[tx_size]][type][ref]; -#else - aom_prob(*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = - cpi->common.fc->coef_probs[txsize_sqr_map[tx_size]][type][ref]; -#endif // CONFIG_SUBFRAME_PROB_UPDATE -#endif // !CONFIG_NEW_TOKENSET #if CONFIG_EC_ADAPT FRAME_CONTEXT *ec_ctx = xd->tile_ctx; -#elif CONFIG_EC_MULTISYMBOL +#else FRAME_CONTEXT *ec_ctx = cpi->common.fc; #endif -#if CONFIG_NEW_TOKENSET aom_cdf_prob( *const coef_head_cdfs)[COEFF_CONTEXTS][CDF_SIZE(ENTROPY_TOKENS)] = ec_ctx->coef_head_cdfs[txsize_sqr_map[tx_size]][type][ref]; @@ -497,13 +455,6 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col, td->counts->blockz_count[txsize_sqr_map[tx_size]][type][ref]; int eob_val; int first_val = 1; -#else -#if CONFIG_EC_MULTISYMBOL - aom_cdf_prob(*const coef_cdfs)[COEFF_CONTEXTS][CDF_SIZE(ENTROPY_TOKENS)] = - ec_ctx->coef_cdfs[txsize_sqr_map[tx_size]][type][ref]; -#endif - int skip_eob = 0; -#endif const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size); unsigned int(*const eob_branch)[COEFF_CONTEXTS] = td->counts->eob_branch[txsize_sqr_map[tx_size]][type][ref]; @@ -517,7 +468,6 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col, nb = scan_order->neighbors; c = 0; -#if CONFIG_NEW_TOKENSET if (eob == 0) add_token(&t, &coef_tail_cdfs[band[c]][pt], &coef_head_cdfs[band[c]][pt], 1, 1, 0, BLOCK_Z_TOKEN); @@ -553,33 +503,6 @@ static void tokenize_b(int plane, int block, int blk_row, int blk_col, ++c; pt = get_coef_context(nb, token_cache, AOMMIN(c, eob - 1)); } -#else - while (c < eob) { - const int v = qcoeff[scan[c]]; - eob_branch[band[c]][pt] += !skip_eob; - - av1_get_token_extra(v, &token, &extra); - - add_token(&t, coef_probs[band[c]][pt], -#if CONFIG_EC_MULTISYMBOL - &coef_cdfs[band[c]][pt], -#endif - extra, (uint8_t)token, (uint8_t)skip_eob, counts[band[c]][pt]); - - token_cache[scan[c]] = av1_pt_energy_class[token]; - ++c; - pt = get_coef_context(nb, token_cache, c); - skip_eob = (token == ZERO_TOKEN); - } - if (c < seg_eob) { - add_token(&t, coef_probs[band[c]][pt], -#if CONFIG_EC_MULTISYMBOL - NULL, -#endif - 0, EOB_TOKEN, 0, counts[band[c]][pt]); - ++eob_branch[band[c]][pt]; - } -#endif // CONFIG_NEW_TOKENSET #if CONFIG_COEF_INTERLEAVE t->token = EOSB_TOKEN; @@ -651,6 +574,18 @@ void tokenize_vartx(ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run, if (tx_size == plane_tx_size) { plane_bsize = get_plane_block_size(mbmi->sb_type, pd); +#if CONFIG_LV_MAP + if (!dry_run) { + av1_update_and_record_txb_context(plane, block, blk_row, blk_col, + plane_bsize, tx_size, arg); + } else if (dry_run == DRY_RUN_NORMAL) { + av1_update_txb_context_b(plane, block, blk_row, blk_col, plane_bsize, + tx_size, arg); + } else { + printf("DRY_RUN_COSTCOEFFS is not supported yet\n"); + assert(0); + } +#else if (!dry_run) tokenize_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg); else if (dry_run == DRY_RUN_NORMAL) @@ -658,6 +593,7 @@ void tokenize_vartx(ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run, tx_size, arg); else if (dry_run == DRY_RUN_COSTCOEFFS) cost_coeffs_b(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg); +#endif } else { // Half the block size in transform block unit. const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; @@ -688,7 +624,11 @@ void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t, MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; +#if CONFIG_LV_MAP + (void)t; +#else TOKENEXTRA *t_backup = *t; +#endif const int ctx = av1_get_skip_context(xd); const int skip_inc = !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP); @@ -698,22 +638,25 @@ void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t, if (mbmi->skip) { if (!dry_run) td->counts->skip[ctx][1] += skip_inc; - reset_skip_context(xd, bsize); + av1_reset_skip_context(xd, mi_row, mi_col, bsize); +#if !CONFIG_LV_MAP if (dry_run) *t = t_backup; +#endif return; } - if (!dry_run) - td->counts->skip[ctx][0] += skip_inc; + if (!dry_run) td->counts->skip[ctx][0] += skip_inc; +#if !CONFIG_LV_MAP else *t = t_backup; +#endif for (plane = 0; plane < MAX_MB_PLANE; ++plane) { #if CONFIG_CB4X4 if (!is_chroma_reference(mi_row, mi_col, bsize, xd->plane[plane].subsampling_x, xd->plane[plane].subsampling_y)) { -#if !CONFIG_PVQ +#if !CONFIG_PVQ || !CONFIG_LV_MAP if (!dry_run) { (*t)->token = EOSB_TOKEN; (*t)++; @@ -746,10 +689,12 @@ void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t, } } +#if !CONFIG_LV_MAP if (!dry_run) { (*t)->token = EOSB_TOKEN; (*t)++; } +#endif } if (rate) *rate += arg.this_rate; } @@ -768,7 +713,7 @@ void av1_tokenize_sb(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t, struct tokenize_b_args arg = { cpi, td, t, 0 }; if (mbmi->skip) { if (!dry_run) td->counts->skip[ctx][1] += skip_inc; - reset_skip_context(xd, bsize); + av1_reset_skip_context(xd, mi_row, mi_col, bsize); return; } @@ -843,8 +788,8 @@ void av1_tokenize_sb(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t, #if CONFIG_SUPERTX void av1_tokenize_sb_supertx(const AV1_COMP *cpi, ThreadData *td, - TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize, - int *rate) { + TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row, + int mi_col, BLOCK_SIZE bsize, int *rate) { const AV1_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &td->mb.e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; @@ -855,7 +800,7 @@ void av1_tokenize_sb_supertx(const AV1_COMP *cpi, ThreadData *td, struct tokenize_b_args arg = { cpi, td, t, 0 }; if (mbmi->skip) { if (!dry_run) td->counts->skip[ctx][1] += skip_inc; - reset_skip_context(xd, bsize); + av1_reset_skip_context(xd, mi_row, mi_col, bsize); if (dry_run) *t = t_backup; return; } diff --git a/third_party/aom/av1/encoder/tokenize.h b/third_party/aom/av1/encoder/tokenize.h index 3928111d6..cbfa3cd91 100644 --- a/third_party/aom/av1/encoder/tokenize.h +++ b/third_party/aom/av1/encoder/tokenize.h @@ -35,14 +35,10 @@ typedef struct { } TOKENVALUE; typedef struct { -#if CONFIG_NEW_TOKENSET aom_cdf_prob (*tail_cdf)[CDF_SIZE(ENTROPY_TOKENS)]; aom_cdf_prob (*head_cdf)[CDF_SIZE(ENTROPY_TOKENS)]; int eob_val; int first_val; -#elif CONFIG_EC_MULTISYMBOL - aom_cdf_prob (*token_cdf)[CDF_SIZE(ENTROPY_TOKENS)]; -#endif const aom_prob *context_tree; EXTRABIT extra; uint8_t token; @@ -51,15 +47,19 @@ typedef struct { extern const aom_tree_index av1_coef_tree[]; extern const aom_tree_index av1_coef_con_tree[]; -#if !CONFIG_EC_MULTISYMBOL -extern const struct av1_token av1_coef_encodings[]; -#endif // !CONFIG_EC_MULTISYMBOL int av1_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); struct AV1_COMP; struct ThreadData; +struct tokenize_b_args { + const struct AV1_COMP *cpi; + struct ThreadData *td; + TOKENEXTRA **tp; + int this_rate; +}; + typedef enum { OUTPUT_ENABLED = 0, DRY_RUN_NORMAL, @@ -85,8 +85,8 @@ void av1_tokenize_sb(const struct AV1_COMP *cpi, struct ThreadData *td, int *rate, const int mi_row, const int mi_col); #if CONFIG_SUPERTX void av1_tokenize_sb_supertx(const struct AV1_COMP *cpi, struct ThreadData *td, - TOKENEXTRA **t, RUN_TYPE dry_run, BLOCK_SIZE bsize, - int *rate); + TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row, + int mi_col, BLOCK_SIZE bsize, int *rate); #endif extern const int16_t *av1_dct_value_cost_ptr; diff --git a/third_party/aom/av1/encoder/variance_tree.c b/third_party/aom/av1/encoder/variance_tree.c deleted file mode 100644 index 9384cd78e..000000000 --- a/third_party/aom/av1/encoder/variance_tree.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "av1/encoder/variance_tree.h" -#include "av1/encoder/encoder.h" - -void av1_setup_var_tree(struct AV1Common *cm, ThreadData *td) { - int i, j; -#if CONFIG_EXT_PARTITION - const int leaf_nodes = 1024; - const int tree_nodes = 1024 + 256 + 64 + 16 + 4 + 1; -#else - const int leaf_nodes = 256; - const int tree_nodes = 256 + 64 + 16 + 4 + 1; -#endif // CONFIG_EXT_PARTITION - int index = 0; - VAR_TREE *this_var; - int nodes; - - aom_free(td->var_tree); - CHECK_MEM_ERROR(cm, td->var_tree, - aom_calloc(tree_nodes, sizeof(*td->var_tree))); - - this_var = &td->var_tree[0]; - - // Sets up all the leaf nodes in the tree. - for (index = 0; index < leaf_nodes; ++index) { - VAR_TREE *const leaf = &td->var_tree[index]; - leaf->split[0] = NULL; - } - - // Each node has 4 leaf nodes, fill in the child pointers - // from leafs to the root. - for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) { - for (i = 0; i < nodes; ++i, ++index) { - VAR_TREE *const node = &td->var_tree[index]; - for (j = 0; j < 4; j++) node->split[j] = this_var++; - } - } - - // Set up the root node for the largest superblock size - i = MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2; - td->var_root[i] = &td->var_tree[tree_nodes - 1]; - // Set up the root nodes for the rest of the possible superblock sizes - while (--i >= 0) { - td->var_root[i] = td->var_root[i + 1]->split[0]; - } -} - -void av1_free_var_tree(ThreadData *td) { - aom_free(td->var_tree); - td->var_tree = NULL; -} diff --git a/third_party/aom/av1/encoder/variance_tree.h b/third_party/aom/av1/encoder/variance_tree.h deleted file mode 100644 index a9f27302e..000000000 --- a/third_party/aom/av1/encoder/variance_tree.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AV1_ENCODER_VARIANCE_TREE_H_ -#define AV1_ENCODER_VARIANCE_TREE_H_ - -#include <assert.h> - -#include "./aom_config.h" - -#include "aom/aom_integer.h" - -#include "av1/common/enums.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct AV1Common; -struct ThreadData; - -typedef struct { - int64_t sum_square_error; - int64_t sum_error; - int log2_count; - int variance; -} VAR; - -typedef struct { - VAR none; - VAR horz[2]; - VAR vert[2]; -} partition_variance; - -typedef struct VAR_TREE { - int force_split; - partition_variance variances; - struct VAR_TREE *split[4]; - BLOCK_SIZE bsize; - const uint8_t *src; - const uint8_t *ref; - int src_stride; - int ref_stride; - int width; - int height; -#if CONFIG_HIGHBITDEPTH - int highbd; -#endif // CONFIG_HIGHBITDEPTH -} VAR_TREE; - -void av1_setup_var_tree(struct AV1Common *cm, struct ThreadData *td); -void av1_free_var_tree(struct ThreadData *td); - -// Set variance values given sum square error, sum error, count. -static INLINE void fill_variance(int64_t s2, int64_t s, int c, VAR *v) { - v->sum_square_error = s2; - v->sum_error = s; - v->log2_count = c; - v->variance = - (int)(256 * (v->sum_square_error - - ((v->sum_error * v->sum_error) >> v->log2_count)) >> - v->log2_count); -} - -static INLINE void sum_2_variances(const VAR *a, const VAR *b, VAR *r) { - assert(a->log2_count == b->log2_count); - fill_variance(a->sum_square_error + b->sum_square_error, - a->sum_error + b->sum_error, a->log2_count + 1, r); -} - -static INLINE void fill_variance_node(VAR_TREE *vt) { - sum_2_variances(&vt->split[0]->variances.none, &vt->split[1]->variances.none, - &vt->variances.horz[0]); - sum_2_variances(&vt->split[2]->variances.none, &vt->split[3]->variances.none, - &vt->variances.horz[1]); - sum_2_variances(&vt->split[0]->variances.none, &vt->split[2]->variances.none, - &vt->variances.vert[0]); - sum_2_variances(&vt->split[1]->variances.none, &vt->split[3]->variances.none, - &vt->variances.vert[1]); - sum_2_variances(&vt->variances.vert[0], &vt->variances.vert[1], - &vt->variances.none); -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif /* AV1_ENCODER_VARIANCE_TREE_H_ */ diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c b/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c index f9c95b6cb..190317389 100644 --- a/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c +++ b/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c @@ -15,13 +15,65 @@ #include "./av1_rtcd.h" #include "aom/aom_integer.h" -void av1_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs, +static INLINE void read_coeff(const tran_low_t *coeff, intptr_t offset, + __m128i *c0, __m128i *c1) { + const tran_low_t *addr = coeff + offset; +#if CONFIG_HIGHBITDEPTH + const __m128i x0 = _mm_load_si128((const __m128i *)addr); + const __m128i x1 = _mm_load_si128((const __m128i *)addr + 1); + const __m128i x2 = _mm_load_si128((const __m128i *)addr + 2); + const __m128i x3 = _mm_load_si128((const __m128i *)addr + 3); + *c0 = _mm_packs_epi32(x0, x1); + *c1 = _mm_packs_epi32(x2, x3); +#else + *c0 = _mm_load_si128((const __m128i *)addr); + *c1 = _mm_load_si128((const __m128i *)addr + 1); +#endif +} + +static INLINE void write_qcoeff(const __m128i *qc0, const __m128i *qc1, + tran_low_t *qcoeff, intptr_t offset) { + tran_low_t *addr = qcoeff + offset; +#if CONFIG_HIGHBITDEPTH + const __m128i zero = _mm_setzero_si128(); + __m128i sign_bits = _mm_cmplt_epi16(*qc0, zero); + __m128i y0 = _mm_unpacklo_epi16(*qc0, sign_bits); + __m128i y1 = _mm_unpackhi_epi16(*qc0, sign_bits); + _mm_store_si128((__m128i *)addr, y0); + _mm_store_si128((__m128i *)addr + 1, y1); + + sign_bits = _mm_cmplt_epi16(*qc1, zero); + y0 = _mm_unpacklo_epi16(*qc1, sign_bits); + y1 = _mm_unpackhi_epi16(*qc1, sign_bits); + _mm_store_si128((__m128i *)addr + 2, y0); + _mm_store_si128((__m128i *)addr + 3, y1); +#else + _mm_store_si128((__m128i *)addr, *qc0); + _mm_store_si128((__m128i *)addr + 1, *qc1); +#endif +} + +static INLINE void write_zero(tran_low_t *qcoeff, intptr_t offset) { + const __m128i zero = _mm_setzero_si128(); + tran_low_t *addr = qcoeff + offset; +#if CONFIG_HIGHBITDEPTH + _mm_store_si128((__m128i *)addr, zero); + _mm_store_si128((__m128i *)addr + 1, zero); + _mm_store_si128((__m128i *)addr + 2, zero); + _mm_store_si128((__m128i *)addr + 3, zero); +#else + _mm_store_si128((__m128i *)addr, zero); + _mm_store_si128((__m128i *)addr + 1, zero); +#endif +} + +void av1_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, - int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan_ptr, - const int16_t *iscan_ptr) { + const int16_t *quant_shift_ptr, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const int16_t *scan_ptr, const int16_t *iscan_ptr) { __m128i zero; __m128i thr; int16_t nzflag; @@ -54,8 +106,7 @@ void av1_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs, __m128i qcoeff0, qcoeff1; __m128i qtmp0, qtmp1; // Do DC and first 15 AC - coeff0 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs)); - coeff1 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs) + 1); + read_coeff(coeff_ptr, n_coeffs, &coeff0, &coeff1); // Poor man's sign extract coeff0_sign = _mm_srai_epi16(coeff0, 15); @@ -78,15 +129,13 @@ void av1_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs, qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0); - _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1); + write_qcoeff(&qcoeff0, &qcoeff1, qcoeff_ptr, n_coeffs); coeff0 = _mm_mullo_epi16(qcoeff0, dequant); dequant = _mm_unpackhi_epi64(dequant, dequant); coeff1 = _mm_mullo_epi16(qcoeff1, dequant); - _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0); - _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1); + write_qcoeff(&coeff0, &coeff1, dqcoeff_ptr, n_coeffs); } { @@ -121,8 +170,7 @@ void av1_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs, __m128i qcoeff0, qcoeff1; __m128i qtmp0, qtmp1; - coeff0 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs)); - coeff1 = _mm_load_si128((const __m128i *)(coeff_ptr + n_coeffs) + 1); + read_coeff(coeff_ptr, n_coeffs, &coeff0, &coeff1); // Poor man's sign extract coeff0_sign = _mm_srai_epi16(coeff0, 15); @@ -147,20 +195,15 @@ void av1_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs, qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0); - _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1); + write_qcoeff(&qcoeff0, &qcoeff1, qcoeff_ptr, n_coeffs); coeff0 = _mm_mullo_epi16(qcoeff0, dequant); coeff1 = _mm_mullo_epi16(qcoeff1, dequant); - _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0); - _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1); + write_qcoeff(&coeff0, &coeff1, dqcoeff_ptr, n_coeffs); } else { - _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero); - _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero); - - _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero); - _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero); + write_zero(qcoeff_ptr, n_coeffs); + write_zero(dqcoeff_ptr, n_coeffs); } } @@ -200,10 +243,8 @@ void av1_quantize_fp_sse2(const int16_t *coeff_ptr, intptr_t n_coeffs, } } else { do { - _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero); - _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero); - _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero); - _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero); + write_zero(dqcoeff_ptr, n_coeffs); + write_zero(qcoeff_ptr, n_coeffs); n_coeffs += 8 * 2; } while (n_coeffs < 0); *eob_ptr = 0; diff --git a/third_party/aom/av1/encoder/x86/corner_match_sse4.c b/third_party/aom/av1/encoder/x86/corner_match_sse4.c new file mode 100644 index 000000000..179da0d28 --- /dev/null +++ b/third_party/aom/av1/encoder/x86/corner_match_sse4.c @@ -0,0 +1,91 @@ +#include <stdlib.h> +#include <memory.h> +#include <math.h> +#include <assert.h> + +#include <smmintrin.h> + +#include "./av1_rtcd.h" +#include "aom_ports/mem.h" +#include "av1/encoder/corner_match.h" + +DECLARE_ALIGNED(16, static const uint8_t, byte_mask[16]) = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0 +}; +#if MATCH_SZ != 13 +#error "Need to change byte_mask in corner_match_sse4.c if MATCH_SZ != 13" +#endif + +/* Compute corr(im1, im2) * MATCH_SZ * stddev(im1), where the + correlation/standard deviation are taken over MATCH_SZ by MATCH_SZ windows + of each image, centered at (x1, y1) and (x2, y2) respectively. +*/ +double compute_cross_correlation_sse4_1(unsigned char *im1, int stride1, int x1, + int y1, unsigned char *im2, int stride2, + int x2, int y2) { + int i; + // 2 16-bit partial sums in lanes 0, 4 (== 2 32-bit partial sums in lanes 0, + // 2) + __m128i sum1_vec = _mm_setzero_si128(); + __m128i sum2_vec = _mm_setzero_si128(); + // 4 32-bit partial sums of squares + __m128i sumsq2_vec = _mm_setzero_si128(); + __m128i cross_vec = _mm_setzero_si128(); + + const __m128i mask = _mm_load_si128((__m128i *)byte_mask); + const __m128i zero = _mm_setzero_si128(); + + im1 += (y1 - MATCH_SZ_BY2) * stride1 + (x1 - MATCH_SZ_BY2); + im2 += (y2 - MATCH_SZ_BY2) * stride2 + (x2 - MATCH_SZ_BY2); + + for (i = 0; i < MATCH_SZ; ++i) { + const __m128i v1 = + _mm_and_si128(_mm_loadu_si128((__m128i *)&im1[i * stride1]), mask); + const __m128i v2 = + _mm_and_si128(_mm_loadu_si128((__m128i *)&im2[i * stride2]), mask); + + // Using the 'sad' intrinsic here is a bit faster than adding + // v1_l + v1_r and v2_l + v2_r, plus it avoids the need for a 16->32 bit + // conversion step later, for a net speedup of ~10% + sum1_vec = _mm_add_epi16(sum1_vec, _mm_sad_epu8(v1, zero)); + sum2_vec = _mm_add_epi16(sum2_vec, _mm_sad_epu8(v2, zero)); + + const __m128i v1_l = _mm_cvtepu8_epi16(v1); + const __m128i v1_r = _mm_cvtepu8_epi16(_mm_srli_si128(v1, 8)); + const __m128i v2_l = _mm_cvtepu8_epi16(v2); + const __m128i v2_r = _mm_cvtepu8_epi16(_mm_srli_si128(v2, 8)); + + sumsq2_vec = _mm_add_epi32( + sumsq2_vec, + _mm_add_epi32(_mm_madd_epi16(v2_l, v2_l), _mm_madd_epi16(v2_r, v2_r))); + cross_vec = _mm_add_epi32( + cross_vec, + _mm_add_epi32(_mm_madd_epi16(v1_l, v2_l), _mm_madd_epi16(v1_r, v2_r))); + } + + // Now we can treat the four registers (sum1_vec, sum2_vec, sumsq2_vec, + // cross_vec) + // as holding 4 32-bit elements each, which we want to sum horizontally. + // We do this by transposing and then summing vertically. + __m128i tmp_0 = _mm_unpacklo_epi32(sum1_vec, sum2_vec); + __m128i tmp_1 = _mm_unpackhi_epi32(sum1_vec, sum2_vec); + __m128i tmp_2 = _mm_unpacklo_epi32(sumsq2_vec, cross_vec); + __m128i tmp_3 = _mm_unpackhi_epi32(sumsq2_vec, cross_vec); + + __m128i tmp_4 = _mm_unpacklo_epi64(tmp_0, tmp_2); + __m128i tmp_5 = _mm_unpackhi_epi64(tmp_0, tmp_2); + __m128i tmp_6 = _mm_unpacklo_epi64(tmp_1, tmp_3); + __m128i tmp_7 = _mm_unpackhi_epi64(tmp_1, tmp_3); + + __m128i res = + _mm_add_epi32(_mm_add_epi32(tmp_4, tmp_5), _mm_add_epi32(tmp_6, tmp_7)); + + int sum1 = _mm_extract_epi32(res, 0); + int sum2 = _mm_extract_epi32(res, 1); + int sumsq2 = _mm_extract_epi32(res, 2); + int cross = _mm_extract_epi32(res, 3); + + int var2 = sumsq2 * MATCH_SZ_SQ - sum2 * sum2; + int cov = cross * MATCH_SZ_SQ - sum1 * sum2; + return cov / sqrt((double)var2); +} diff --git a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c b/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c index f201a29aa..b56eed518 100644 --- a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c +++ b/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c @@ -13,7 +13,7 @@ #include "./av1_rtcd.h" #include "./aom_config.h" -#include "av1/common/av1_fwd_txfm2d_cfg.h" +#include "av1/common/av1_fwd_txfm1d_cfg.h" #include "av1/common/av1_txfm.h" #include "av1/common/x86/highbd_txfm_utility_sse4.h" #include "aom_dsp/txfm_common.h" @@ -58,7 +58,7 @@ static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in, // shift[1] is used in txfm_func_col() // shift[2] is used in txfm_func_row() static void fdct4x4_sse4_1(__m128i *in, int bit) { - const int32_t *cospi = cospi_arr[bit - cos_bit_min]; + const int32_t *cospi = cospi_arr(bit); const __m128i cospi32 = _mm_set1_epi32(cospi[32]); const __m128i cospi48 = _mm_set1_epi32(cospi[48]); const __m128i cospi16 = _mm_set1_epi32(cospi[16]); @@ -133,7 +133,7 @@ void av1_highbd_fht4x4_sse4_1(const int16_t *input, tran_low_t *output, } static void fadst4x4_sse4_1(__m128i *in, int bit) { - const int32_t *cospi = cospi_arr[bit - cos_bit_min]; + const int32_t *cospi = cospi_arr(bit); const __m128i cospi8 = _mm_set1_epi32(cospi[8]); const __m128i cospi56 = _mm_set1_epi32(cospi[56]); const __m128i cospi40 = _mm_set1_epi32(cospi[40]); @@ -209,71 +209,81 @@ static void fadst4x4_sse4_1(__m128i *in, int bit) { void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *coeff, int input_stride, int tx_type, int bd) { __m128i in[4]; - const TXFM_2D_CFG *cfg = NULL; + const TXFM_1D_CFG *row_cfg = NULL; + const TXFM_1D_CFG *col_cfg = NULL; switch (tx_type) { case DCT_DCT: - cfg = &fwd_txfm_2d_cfg_dct_dct_4; - load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]); - fdct4x4_sse4_1(in, cfg->cos_bit_col[2]); - fdct4x4_sse4_1(in, cfg->cos_bit_row[2]); + row_cfg = &fwd_txfm_1d_row_cfg_dct_4; + col_cfg = &fwd_txfm_1d_col_cfg_dct_4; + load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]); + fdct4x4_sse4_1(in, col_cfg->cos_bit[2]); + fdct4x4_sse4_1(in, row_cfg->cos_bit[2]); write_buffer_4x4(in, coeff); break; case ADST_DCT: - cfg = &fwd_txfm_2d_cfg_adst_dct_4; - load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]); - fadst4x4_sse4_1(in, cfg->cos_bit_col[2]); - fdct4x4_sse4_1(in, cfg->cos_bit_row[2]); + row_cfg = &fwd_txfm_1d_row_cfg_dct_4; + col_cfg = &fwd_txfm_1d_col_cfg_adst_4; + load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]); + fadst4x4_sse4_1(in, col_cfg->cos_bit[2]); + fdct4x4_sse4_1(in, row_cfg->cos_bit[2]); write_buffer_4x4(in, coeff); break; case DCT_ADST: - cfg = &fwd_txfm_2d_cfg_dct_adst_4; - load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]); - fdct4x4_sse4_1(in, cfg->cos_bit_col[2]); - fadst4x4_sse4_1(in, cfg->cos_bit_row[2]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_4; + col_cfg = &fwd_txfm_1d_col_cfg_dct_4; + load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]); + fdct4x4_sse4_1(in, col_cfg->cos_bit[2]); + fadst4x4_sse4_1(in, row_cfg->cos_bit[2]); write_buffer_4x4(in, coeff); break; case ADST_ADST: - cfg = &fwd_txfm_2d_cfg_adst_adst_4; - load_buffer_4x4(input, in, input_stride, 0, 0, cfg->shift[0]); - fadst4x4_sse4_1(in, cfg->cos_bit_col[2]); - fadst4x4_sse4_1(in, cfg->cos_bit_row[2]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_4; + col_cfg = &fwd_txfm_1d_col_cfg_adst_4; + load_buffer_4x4(input, in, input_stride, 0, 0, row_cfg->shift[0]); + fadst4x4_sse4_1(in, col_cfg->cos_bit[2]); + fadst4x4_sse4_1(in, row_cfg->cos_bit[2]); write_buffer_4x4(in, coeff); break; #if CONFIG_EXT_TX case FLIPADST_DCT: - cfg = &fwd_txfm_2d_cfg_adst_dct_4; - load_buffer_4x4(input, in, input_stride, 1, 0, cfg->shift[0]); - fadst4x4_sse4_1(in, cfg->cos_bit_col[2]); - fdct4x4_sse4_1(in, cfg->cos_bit_row[2]); + row_cfg = &fwd_txfm_1d_row_cfg_dct_4; + col_cfg = &fwd_txfm_1d_col_cfg_adst_4; + load_buffer_4x4(input, in, input_stride, 1, 0, row_cfg->shift[0]); + fadst4x4_sse4_1(in, col_cfg->cos_bit[2]); + fdct4x4_sse4_1(in, row_cfg->cos_bit[2]); write_buffer_4x4(in, coeff); break; case DCT_FLIPADST: - cfg = &fwd_txfm_2d_cfg_dct_adst_4; - load_buffer_4x4(input, in, input_stride, 0, 1, cfg->shift[0]); - fdct4x4_sse4_1(in, cfg->cos_bit_col[2]); - fadst4x4_sse4_1(in, cfg->cos_bit_row[2]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_4; + col_cfg = &fwd_txfm_1d_col_cfg_dct_4; + load_buffer_4x4(input, in, input_stride, 0, 1, row_cfg->shift[0]); + fdct4x4_sse4_1(in, col_cfg->cos_bit[2]); + fadst4x4_sse4_1(in, row_cfg->cos_bit[2]); write_buffer_4x4(in, coeff); break; case FLIPADST_FLIPADST: - cfg = &fwd_txfm_2d_cfg_adst_adst_4; - load_buffer_4x4(input, in, input_stride, 1, 1, cfg->shift[0]); - fadst4x4_sse4_1(in, cfg->cos_bit_col[2]); - fadst4x4_sse4_1(in, cfg->cos_bit_row[2]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_4; + col_cfg = &fwd_txfm_1d_col_cfg_adst_4; + load_buffer_4x4(input, in, input_stride, 1, 1, row_cfg->shift[0]); + fadst4x4_sse4_1(in, col_cfg->cos_bit[2]); + fadst4x4_sse4_1(in, row_cfg->cos_bit[2]); write_buffer_4x4(in, coeff); break; case ADST_FLIPADST: - cfg = &fwd_txfm_2d_cfg_adst_adst_4; - load_buffer_4x4(input, in, input_stride, 0, 1, cfg->shift[0]); - fadst4x4_sse4_1(in, cfg->cos_bit_col[2]); - fadst4x4_sse4_1(in, cfg->cos_bit_row[2]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_4; + col_cfg = &fwd_txfm_1d_col_cfg_adst_4; + load_buffer_4x4(input, in, input_stride, 0, 1, row_cfg->shift[0]); + fadst4x4_sse4_1(in, col_cfg->cos_bit[2]); + fadst4x4_sse4_1(in, row_cfg->cos_bit[2]); write_buffer_4x4(in, coeff); break; case FLIPADST_ADST: - cfg = &fwd_txfm_2d_cfg_adst_adst_4; - load_buffer_4x4(input, in, input_stride, 1, 0, cfg->shift[0]); - fadst4x4_sse4_1(in, cfg->cos_bit_col[2]); - fadst4x4_sse4_1(in, cfg->cos_bit_row[2]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_4; + col_cfg = &fwd_txfm_1d_col_cfg_adst_4; + load_buffer_4x4(input, in, input_stride, 1, 0, row_cfg->shift[0]); + fadst4x4_sse4_1(in, col_cfg->cos_bit[2]); + fadst4x4_sse4_1(in, row_cfg->cos_bit[2]); write_buffer_4x4(in, coeff); break; #endif @@ -429,7 +439,7 @@ static INLINE void write_buffer_8x8(const __m128i *res, tran_low_t *output) { } static void fdct8x8_sse4_1(__m128i *in, __m128i *out, int bit) { - const int32_t *cospi = cospi_arr[bit - cos_bit_min]; + const int32_t *cospi = cospi_arr(bit); const __m128i cospi32 = _mm_set1_epi32(cospi[32]); const __m128i cospim32 = _mm_set1_epi32(-cospi[32]); const __m128i cospi48 = _mm_set1_epi32(cospi[48]); @@ -625,7 +635,7 @@ static void fdct8x8_sse4_1(__m128i *in, __m128i *out, int bit) { } static void fadst8x8_sse4_1(__m128i *in, __m128i *out, int bit) { - const int32_t *cospi = cospi_arr[bit - cos_bit_min]; + const int32_t *cospi = cospi_arr(bit); const __m128i cospi4 = _mm_set1_epi32(cospi[4]); const __m128i cospi60 = _mm_set1_epi32(cospi[60]); const __m128i cospi20 = _mm_set1_epi32(cospi[20]); @@ -930,97 +940,107 @@ static void fadst8x8_sse4_1(__m128i *in, __m128i *out, int bit) { void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *coeff, int stride, int tx_type, int bd) { __m128i in[16], out[16]; - const TXFM_2D_CFG *cfg = NULL; + const TXFM_1D_CFG *row_cfg = NULL; + const TXFM_1D_CFG *col_cfg = NULL; switch (tx_type) { case DCT_DCT: - cfg = &fwd_txfm_2d_cfg_dct_dct_8; - load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]); - fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]); - col_txfm_8x8_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_dct_8; + col_cfg = &fwd_txfm_1d_col_cfg_dct_8; + load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]); + fdct8x8_sse4_1(in, out, col_cfg->cos_bit[2]); + col_txfm_8x8_rounding(out, -row_cfg->shift[1]); transpose_8x8(out, in); - fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]); + fdct8x8_sse4_1(in, out, row_cfg->cos_bit[2]); transpose_8x8(out, in); write_buffer_8x8(in, coeff); break; case ADST_DCT: - cfg = &fwd_txfm_2d_cfg_adst_dct_8; - load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]); - fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]); - col_txfm_8x8_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_dct_8; + col_cfg = &fwd_txfm_1d_col_cfg_adst_8; + load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]); + fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]); + col_txfm_8x8_rounding(out, -row_cfg->shift[1]); transpose_8x8(out, in); - fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]); + fdct8x8_sse4_1(in, out, row_cfg->cos_bit[2]); transpose_8x8(out, in); write_buffer_8x8(in, coeff); break; case DCT_ADST: - cfg = &fwd_txfm_2d_cfg_dct_adst_8; - load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]); - fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]); - col_txfm_8x8_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_8; + col_cfg = &fwd_txfm_1d_col_cfg_dct_8; + load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]); + fdct8x8_sse4_1(in, out, col_cfg->cos_bit[2]); + col_txfm_8x8_rounding(out, -row_cfg->shift[1]); transpose_8x8(out, in); - fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]); + fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]); transpose_8x8(out, in); write_buffer_8x8(in, coeff); break; case ADST_ADST: - cfg = &fwd_txfm_2d_cfg_adst_adst_8; - load_buffer_8x8(input, in, stride, 0, 0, cfg->shift[0]); - fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]); - col_txfm_8x8_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_8; + col_cfg = &fwd_txfm_1d_col_cfg_adst_8; + load_buffer_8x8(input, in, stride, 0, 0, row_cfg->shift[0]); + fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]); + col_txfm_8x8_rounding(out, -row_cfg->shift[1]); transpose_8x8(out, in); - fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]); + fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]); transpose_8x8(out, in); write_buffer_8x8(in, coeff); break; #if CONFIG_EXT_TX case FLIPADST_DCT: - cfg = &fwd_txfm_2d_cfg_adst_dct_8; - load_buffer_8x8(input, in, stride, 1, 0, cfg->shift[0]); - fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]); - col_txfm_8x8_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_dct_8; + col_cfg = &fwd_txfm_1d_col_cfg_adst_8; + load_buffer_8x8(input, in, stride, 1, 0, row_cfg->shift[0]); + fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]); + col_txfm_8x8_rounding(out, -row_cfg->shift[1]); transpose_8x8(out, in); - fdct8x8_sse4_1(in, out, cfg->cos_bit_row[2]); + fdct8x8_sse4_1(in, out, row_cfg->cos_bit[2]); transpose_8x8(out, in); write_buffer_8x8(in, coeff); break; case DCT_FLIPADST: - cfg = &fwd_txfm_2d_cfg_dct_adst_8; - load_buffer_8x8(input, in, stride, 0, 1, cfg->shift[0]); - fdct8x8_sse4_1(in, out, cfg->cos_bit_col[2]); - col_txfm_8x8_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_8; + col_cfg = &fwd_txfm_1d_col_cfg_dct_8; + load_buffer_8x8(input, in, stride, 0, 1, row_cfg->shift[0]); + fdct8x8_sse4_1(in, out, col_cfg->cos_bit[2]); + col_txfm_8x8_rounding(out, -row_cfg->shift[1]); transpose_8x8(out, in); - fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]); + fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]); transpose_8x8(out, in); write_buffer_8x8(in, coeff); break; case FLIPADST_FLIPADST: - cfg = &fwd_txfm_2d_cfg_adst_adst_8; - load_buffer_8x8(input, in, stride, 1, 1, cfg->shift[0]); - fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]); - col_txfm_8x8_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_8; + col_cfg = &fwd_txfm_1d_col_cfg_adst_8; + load_buffer_8x8(input, in, stride, 1, 1, row_cfg->shift[0]); + fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]); + col_txfm_8x8_rounding(out, -row_cfg->shift[1]); transpose_8x8(out, in); - fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]); + fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]); transpose_8x8(out, in); write_buffer_8x8(in, coeff); break; case ADST_FLIPADST: - cfg = &fwd_txfm_2d_cfg_adst_adst_8; - load_buffer_8x8(input, in, stride, 0, 1, cfg->shift[0]); - fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]); - col_txfm_8x8_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_8; + col_cfg = &fwd_txfm_1d_col_cfg_adst_8; + load_buffer_8x8(input, in, stride, 0, 1, row_cfg->shift[0]); + fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]); + col_txfm_8x8_rounding(out, -row_cfg->shift[1]); transpose_8x8(out, in); - fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]); + fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]); transpose_8x8(out, in); write_buffer_8x8(in, coeff); break; case FLIPADST_ADST: - cfg = &fwd_txfm_2d_cfg_adst_adst_8; - load_buffer_8x8(input, in, stride, 1, 0, cfg->shift[0]); - fadst8x8_sse4_1(in, out, cfg->cos_bit_col[2]); - col_txfm_8x8_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_8; + col_cfg = &fwd_txfm_1d_col_cfg_adst_8; + load_buffer_8x8(input, in, stride, 1, 0, row_cfg->shift[0]); + fadst8x8_sse4_1(in, out, col_cfg->cos_bit[2]); + col_txfm_8x8_rounding(out, -row_cfg->shift[1]); transpose_8x8(out, in); - fadst8x8_sse4_1(in, out, cfg->cos_bit_row[2]); + fadst8x8_sse4_1(in, out, row_cfg->cos_bit[2]); transpose_8x8(out, in); write_buffer_8x8(in, coeff); break; @@ -1107,7 +1127,7 @@ static INLINE void load_buffer_16x16(const int16_t *input, __m128i *out, } static void fdct16x16_sse4_1(__m128i *in, __m128i *out, int bit) { - const int32_t *cospi = cospi_arr[bit - cos_bit_min]; + const int32_t *cospi = cospi_arr(bit); const __m128i cospi32 = _mm_set1_epi32(cospi[32]); const __m128i cospim32 = _mm_set1_epi32(-cospi[32]); const __m128i cospi48 = _mm_set1_epi32(cospi[48]); @@ -1393,7 +1413,7 @@ static void fdct16x16_sse4_1(__m128i *in, __m128i *out, int bit) { } static void fadst16x16_sse4_1(__m128i *in, __m128i *out, int bit) { - const int32_t *cospi = cospi_arr[bit - cos_bit_min]; + const int32_t *cospi = cospi_arr(bit); const __m128i cospi2 = _mm_set1_epi32(cospi[2]); const __m128i cospi62 = _mm_set1_epi32(cospi[62]); const __m128i cospi10 = _mm_set1_epi32(cospi[10]); @@ -1794,97 +1814,107 @@ static void write_buffer_16x16(const __m128i *in, tran_low_t *output) { void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *coeff, int stride, int tx_type, int bd) { __m128i in[64], out[64]; - const TXFM_2D_CFG *cfg = NULL; + const TXFM_1D_CFG *row_cfg = NULL; + const TXFM_1D_CFG *col_cfg = NULL; switch (tx_type) { case DCT_DCT: - cfg = &fwd_txfm_2d_cfg_dct_dct_16; - load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]); - fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]); - col_txfm_16x16_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_dct_16; + col_cfg = &fwd_txfm_1d_col_cfg_dct_16; + load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]); + fdct16x16_sse4_1(in, out, col_cfg->cos_bit[0]); + col_txfm_16x16_rounding(out, -row_cfg->shift[1]); transpose_16x16(out, in); - fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]); + fdct16x16_sse4_1(in, out, row_cfg->cos_bit[0]); transpose_16x16(out, in); write_buffer_16x16(in, coeff); break; case ADST_DCT: - cfg = &fwd_txfm_2d_cfg_adst_dct_16; - load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]); - fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]); - col_txfm_16x16_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_dct_16; + col_cfg = &fwd_txfm_1d_col_cfg_adst_16; + load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]); + fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]); + col_txfm_16x16_rounding(out, -row_cfg->shift[1]); transpose_16x16(out, in); - fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]); + fdct16x16_sse4_1(in, out, row_cfg->cos_bit[0]); transpose_16x16(out, in); write_buffer_16x16(in, coeff); break; case DCT_ADST: - cfg = &fwd_txfm_2d_cfg_dct_adst_16; - load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]); - fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]); - col_txfm_16x16_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_16; + col_cfg = &fwd_txfm_1d_col_cfg_dct_16; + load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]); + fdct16x16_sse4_1(in, out, col_cfg->cos_bit[0]); + col_txfm_16x16_rounding(out, -row_cfg->shift[1]); transpose_16x16(out, in); - fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]); + fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]); transpose_16x16(out, in); write_buffer_16x16(in, coeff); break; case ADST_ADST: - cfg = &fwd_txfm_2d_cfg_adst_adst_16; - load_buffer_16x16(input, in, stride, 0, 0, cfg->shift[0]); - fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]); - col_txfm_16x16_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_16; + col_cfg = &fwd_txfm_1d_col_cfg_adst_16; + load_buffer_16x16(input, in, stride, 0, 0, row_cfg->shift[0]); + fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]); + col_txfm_16x16_rounding(out, -row_cfg->shift[1]); transpose_16x16(out, in); - fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]); + fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]); transpose_16x16(out, in); write_buffer_16x16(in, coeff); break; #if CONFIG_EXT_TX case FLIPADST_DCT: - cfg = &fwd_txfm_2d_cfg_adst_dct_16; - load_buffer_16x16(input, in, stride, 1, 0, cfg->shift[0]); - fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]); - col_txfm_16x16_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_dct_16; + col_cfg = &fwd_txfm_1d_col_cfg_adst_16; + load_buffer_16x16(input, in, stride, 1, 0, row_cfg->shift[0]); + fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]); + col_txfm_16x16_rounding(out, -row_cfg->shift[1]); transpose_16x16(out, in); - fdct16x16_sse4_1(in, out, cfg->cos_bit_row[0]); + fdct16x16_sse4_1(in, out, row_cfg->cos_bit[0]); transpose_16x16(out, in); write_buffer_16x16(in, coeff); break; case DCT_FLIPADST: - cfg = &fwd_txfm_2d_cfg_dct_adst_16; - load_buffer_16x16(input, in, stride, 0, 1, cfg->shift[0]); - fdct16x16_sse4_1(in, out, cfg->cos_bit_col[0]); - col_txfm_16x16_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_16; + col_cfg = &fwd_txfm_1d_col_cfg_dct_16; + load_buffer_16x16(input, in, stride, 0, 1, row_cfg->shift[0]); + fdct16x16_sse4_1(in, out, col_cfg->cos_bit[0]); + col_txfm_16x16_rounding(out, -row_cfg->shift[1]); transpose_16x16(out, in); - fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]); + fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]); transpose_16x16(out, in); write_buffer_16x16(in, coeff); break; case FLIPADST_FLIPADST: - cfg = &fwd_txfm_2d_cfg_adst_adst_16; - load_buffer_16x16(input, in, stride, 1, 1, cfg->shift[0]); - fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]); - col_txfm_16x16_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_16; + col_cfg = &fwd_txfm_1d_col_cfg_adst_16; + load_buffer_16x16(input, in, stride, 1, 1, row_cfg->shift[0]); + fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]); + col_txfm_16x16_rounding(out, -row_cfg->shift[1]); transpose_16x16(out, in); - fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]); + fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]); transpose_16x16(out, in); write_buffer_16x16(in, coeff); break; case ADST_FLIPADST: - cfg = &fwd_txfm_2d_cfg_adst_adst_16; - load_buffer_16x16(input, in, stride, 0, 1, cfg->shift[0]); - fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]); - col_txfm_16x16_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_16; + col_cfg = &fwd_txfm_1d_col_cfg_adst_16; + load_buffer_16x16(input, in, stride, 0, 1, row_cfg->shift[0]); + fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]); + col_txfm_16x16_rounding(out, -row_cfg->shift[1]); transpose_16x16(out, in); - fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]); + fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]); transpose_16x16(out, in); write_buffer_16x16(in, coeff); break; case FLIPADST_ADST: - cfg = &fwd_txfm_2d_cfg_adst_adst_16; - load_buffer_16x16(input, in, stride, 1, 0, cfg->shift[0]); - fadst16x16_sse4_1(in, out, cfg->cos_bit_col[0]); - col_txfm_16x16_rounding(out, -cfg->shift[1]); + row_cfg = &fwd_txfm_1d_row_cfg_adst_16; + col_cfg = &fwd_txfm_1d_col_cfg_adst_16; + load_buffer_16x16(input, in, stride, 1, 0, row_cfg->shift[0]); + fadst16x16_sse4_1(in, out, col_cfg->cos_bit[0]); + col_txfm_16x16_rounding(out, -row_cfg->shift[1]); transpose_16x16(out, in); - fadst16x16_sse4_1(in, out, cfg->cos_bit_row[0]); + fadst16x16_sse4_1(in, out, row_cfg->cos_bit[0]); transpose_16x16(out, in); write_buffer_16x16(in, coeff); break; diff --git a/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c b/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c index 198e4e4c4..8495ad1aa 100644 --- a/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c +++ b/third_party/aom/av1/encoder/x86/hybrid_fwd_txfm_avx2.c @@ -269,8 +269,8 @@ static void fdct16_avx2(__m256i *in) { x0 = _mm256_unpacklo_epi16(v0, v1); x1 = _mm256_unpackhi_epi16(v0, v1); - t0 = butter_fly(x0, x1, cospi_p16_p16); - t1 = butter_fly(x0, x1, cospi_p16_m16); + t0 = butter_fly(&x0, &x1, &cospi_p16_p16); + t1 = butter_fly(&x0, &x1, &cospi_p16_m16); // 4, 12 v0 = _mm256_sub_epi16(s1, s2); @@ -279,8 +279,8 @@ static void fdct16_avx2(__m256i *in) { x0 = _mm256_unpacklo_epi16(v0, v1); x1 = _mm256_unpackhi_epi16(v0, v1); - t2 = butter_fly(x0, x1, cospi_p24_p08); - t3 = butter_fly(x0, x1, cospi_m08_p24); + t2 = butter_fly(&x0, &x1, &cospi_p24_p08); + t3 = butter_fly(&x0, &x1, &cospi_m08_p24); // 2, 6, 10, 14 s0 = _mm256_sub_epi16(u3, u4); @@ -294,8 +294,8 @@ static void fdct16_avx2(__m256i *in) { x0 = _mm256_unpacklo_epi16(s2, s1); x1 = _mm256_unpackhi_epi16(s2, s1); - v2 = butter_fly(x0, x1, cospi_p16_p16); // output[5] - v1 = butter_fly(x0, x1, cospi_p16_m16); // output[6] + v2 = butter_fly(&x0, &x1, &cospi_p16_p16); // output[5] + v1 = butter_fly(&x0, &x1, &cospi_p16_m16); // output[6] s0 = _mm256_add_epi16(v0, v1); // step[4] s1 = _mm256_sub_epi16(v0, v1); // step[5] @@ -306,14 +306,14 @@ static void fdct16_avx2(__m256i *in) { x0 = _mm256_unpacklo_epi16(s0, s3); x1 = _mm256_unpackhi_epi16(s0, s3); - t4 = butter_fly(x0, x1, cospi_p28_p04); - t5 = butter_fly(x0, x1, cospi_m04_p28); + t4 = butter_fly(&x0, &x1, &cospi_p28_p04); + t5 = butter_fly(&x0, &x1, &cospi_m04_p28); // 10, 6 x0 = _mm256_unpacklo_epi16(s1, s2); x1 = _mm256_unpackhi_epi16(s1, s2); - t6 = butter_fly(x0, x1, cospi_p12_p20); - t7 = butter_fly(x0, x1, cospi_m20_p12); + t6 = butter_fly(&x0, &x1, &cospi_p12_p20); + t7 = butter_fly(&x0, &x1, &cospi_m20_p12); // 1, 3, 5, 7, 9, 11, 13, 15 s0 = _mm256_sub_epi16(in[7], in[8]); // step[8] @@ -337,14 +337,14 @@ static void fdct16_avx2(__m256i *in) { x0 = _mm256_unpacklo_epi16(u5, u2); x1 = _mm256_unpackhi_epi16(u5, u2); - s2 = butter_fly(x0, x1, cospi_p16_p16); // step[13] - s5 = butter_fly(x0, x1, cospi_p16_m16); // step[10] + s2 = butter_fly(&x0, &x1, &cospi_p16_p16); // step[13] + s5 = butter_fly(&x0, &x1, &cospi_p16_m16); // step[10] x0 = _mm256_unpacklo_epi16(u4, u3); x1 = _mm256_unpackhi_epi16(u4, u3); - s3 = butter_fly(x0, x1, cospi_p16_p16); // step[12] - s4 = butter_fly(x0, x1, cospi_p16_m16); // step[11] + s3 = butter_fly(&x0, &x1, &cospi_p16_p16); // step[12] + s4 = butter_fly(&x0, &x1, &cospi_p16_m16); // step[11] u0 = _mm256_add_epi16(s0, s4); // output[8] u1 = _mm256_add_epi16(s1, s5); @@ -364,14 +364,14 @@ static void fdct16_avx2(__m256i *in) { x0 = _mm256_unpacklo_epi16(u1, u6); x1 = _mm256_unpackhi_epi16(u1, u6); - s1 = butter_fly(x0, x1, cospi_m08_p24); - s6 = butter_fly(x0, x1, cospi_p24_p08); + s1 = butter_fly(&x0, &x1, &cospi_m08_p24); + s6 = butter_fly(&x0, &x1, &cospi_p24_p08); x0 = _mm256_unpacklo_epi16(u2, u5); x1 = _mm256_unpackhi_epi16(u2, u5); - s2 = butter_fly(x0, x1, cospi_m24_m08); - s5 = butter_fly(x0, x1, cospi_m08_p24); + s2 = butter_fly(&x0, &x1, &cospi_m24_m08); + s5 = butter_fly(&x0, &x1, &cospi_m08_p24); // stage 5 u0 = _mm256_add_epi16(s0, s1); @@ -386,23 +386,23 @@ static void fdct16_avx2(__m256i *in) { // stage 6 x0 = _mm256_unpacklo_epi16(u0, u7); x1 = _mm256_unpackhi_epi16(u0, u7); - in[1] = butter_fly(x0, x1, cospi_p30_p02); - in[15] = butter_fly(x0, x1, cospi_m02_p30); + in[1] = butter_fly(&x0, &x1, &cospi_p30_p02); + in[15] = butter_fly(&x0, &x1, &cospi_m02_p30); x0 = _mm256_unpacklo_epi16(u1, u6); x1 = _mm256_unpackhi_epi16(u1, u6); - in[9] = butter_fly(x0, x1, cospi_p14_p18); - in[7] = butter_fly(x0, x1, cospi_m18_p14); + in[9] = butter_fly(&x0, &x1, &cospi_p14_p18); + in[7] = butter_fly(&x0, &x1, &cospi_m18_p14); x0 = _mm256_unpacklo_epi16(u2, u5); x1 = _mm256_unpackhi_epi16(u2, u5); - in[5] = butter_fly(x0, x1, cospi_p22_p10); - in[11] = butter_fly(x0, x1, cospi_m10_p22); + in[5] = butter_fly(&x0, &x1, &cospi_p22_p10); + in[11] = butter_fly(&x0, &x1, &cospi_m10_p22); x0 = _mm256_unpacklo_epi16(u3, u4); x1 = _mm256_unpackhi_epi16(u3, u4); - in[13] = butter_fly(x0, x1, cospi_p06_p26); - in[3] = butter_fly(x0, x1, cospi_m26_p06); + in[13] = butter_fly(&x0, &x1, &cospi_p06_p26); + in[3] = butter_fly(&x0, &x1, &cospi_m26_p06); } void fadst16_avx2(__m256i *in) { @@ -953,7 +953,9 @@ void fadst16_avx2(__m256i *in) { } #if CONFIG_EXT_TX -static void fidtx16_avx2(__m256i *in) { txfm_scaling16_avx2(Sqrt2, in); } +static void fidtx16_avx2(__m256i *in) { + txfm_scaling16_avx2((int16_t)Sqrt2, in); +} #endif void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride, @@ -964,28 +966,28 @@ void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride, case DCT_DCT: load_buffer_16x16(input, stride, 0, 0, in); fdct16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fdct16_avx2(in); break; case ADST_DCT: load_buffer_16x16(input, stride, 0, 0, in); fadst16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fdct16_avx2(in); break; case DCT_ADST: load_buffer_16x16(input, stride, 0, 0, in); fdct16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fadst16_avx2(in); break; case ADST_ADST: load_buffer_16x16(input, stride, 0, 0, in); fadst16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fadst16_avx2(in); break; @@ -993,91 +995,91 @@ void av1_fht16x16_avx2(const int16_t *input, tran_low_t *output, int stride, case FLIPADST_DCT: load_buffer_16x16(input, stride, 1, 0, in); fadst16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fdct16_avx2(in); break; case DCT_FLIPADST: load_buffer_16x16(input, stride, 0, 1, in); fdct16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fadst16_avx2(in); break; case FLIPADST_FLIPADST: load_buffer_16x16(input, stride, 1, 1, in); fadst16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fadst16_avx2(in); break; case ADST_FLIPADST: load_buffer_16x16(input, stride, 0, 1, in); fadst16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fadst16_avx2(in); break; case FLIPADST_ADST: load_buffer_16x16(input, stride, 1, 0, in); fadst16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fadst16_avx2(in); break; case IDTX: load_buffer_16x16(input, stride, 0, 0, in); fidtx16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fidtx16_avx2(in); break; case V_DCT: load_buffer_16x16(input, stride, 0, 0, in); fdct16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fidtx16_avx2(in); break; case H_DCT: load_buffer_16x16(input, stride, 0, 0, in); fidtx16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fdct16_avx2(in); break; case V_ADST: load_buffer_16x16(input, stride, 0, 0, in); fadst16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fidtx16_avx2(in); break; case H_ADST: load_buffer_16x16(input, stride, 0, 0, in); fidtx16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fadst16_avx2(in); break; case V_FLIPADST: load_buffer_16x16(input, stride, 1, 0, in); fadst16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fidtx16_avx2(in); break; case H_FLIPADST: load_buffer_16x16(input, stride, 0, 1, in); fidtx16_avx2(in); - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); right_shift_16x16(in); fadst16_avx2(in); break; #endif // CONFIG_EXT_TX default: assert(0); break; } - mm256_transpose_16x16(in); + mm256_transpose_16x16(in, in); write_buffer_16x16(in, output); _mm256_zeroupper(); } @@ -1110,10 +1112,10 @@ static void mm256_vectors_swap(__m256i *a0, __m256i *a1, const int size) { } static void mm256_transpose_32x32(__m256i *in0, __m256i *in1) { - mm256_transpose_16x16(in0); - mm256_transpose_16x16(&in0[16]); - mm256_transpose_16x16(in1); - mm256_transpose_16x16(&in1[16]); + mm256_transpose_16x16(in0, in0); + mm256_transpose_16x16(&in0[16], &in0[16]); + mm256_transpose_16x16(in1, in1); + mm256_transpose_16x16(&in1[16], &in1[16]); mm256_vectors_swap(&in0[16], in1, 16); } @@ -1247,23 +1249,23 @@ static void fdct16_odd_avx2(__m256i *in) { u0 = _mm256_unpacklo_epi16(in[4], in[11]); u1 = _mm256_unpackhi_epi16(in[4], in[11]); - y4 = butter_fly(u0, u1, cospi_m16_p16); - y11 = butter_fly(u0, u1, cospi_p16_p16); + y4 = butter_fly(&u0, &u1, &cospi_m16_p16); + y11 = butter_fly(&u0, &u1, &cospi_p16_p16); u0 = _mm256_unpacklo_epi16(in[5], in[10]); u1 = _mm256_unpackhi_epi16(in[5], in[10]); - y5 = butter_fly(u0, u1, cospi_m16_p16); - y10 = butter_fly(u0, u1, cospi_p16_p16); + y5 = butter_fly(&u0, &u1, &cospi_m16_p16); + y10 = butter_fly(&u0, &u1, &cospi_p16_p16); u0 = _mm256_unpacklo_epi16(in[6], in[9]); u1 = _mm256_unpackhi_epi16(in[6], in[9]); - y6 = butter_fly(u0, u1, cospi_m16_p16); - y9 = butter_fly(u0, u1, cospi_p16_p16); + y6 = butter_fly(&u0, &u1, &cospi_m16_p16); + y9 = butter_fly(&u0, &u1, &cospi_p16_p16); u0 = _mm256_unpacklo_epi16(in[7], in[8]); u1 = _mm256_unpackhi_epi16(in[7], in[8]); - y7 = butter_fly(u0, u1, cospi_m16_p16); - y8 = butter_fly(u0, u1, cospi_p16_p16); + y7 = butter_fly(&u0, &u1, &cospi_m16_p16); + y8 = butter_fly(&u0, &u1, &cospi_p16_p16); y12 = in[12]; y13 = in[13]; @@ -1300,23 +1302,23 @@ static void fdct16_odd_avx2(__m256i *in) { u0 = _mm256_unpacklo_epi16(x2, x13); u1 = _mm256_unpackhi_epi16(x2, x13); - y2 = butter_fly(u0, u1, cospi_m08_p24); - y13 = butter_fly(u0, u1, cospi_p24_p08); + y2 = butter_fly(&u0, &u1, &cospi_m08_p24); + y13 = butter_fly(&u0, &u1, &cospi_p24_p08); u0 = _mm256_unpacklo_epi16(x3, x12); u1 = _mm256_unpackhi_epi16(x3, x12); - y3 = butter_fly(u0, u1, cospi_m08_p24); - y12 = butter_fly(u0, u1, cospi_p24_p08); + y3 = butter_fly(&u0, &u1, &cospi_m08_p24); + y12 = butter_fly(&u0, &u1, &cospi_p24_p08); u0 = _mm256_unpacklo_epi16(x4, x11); u1 = _mm256_unpackhi_epi16(x4, x11); - y4 = butter_fly(u0, u1, cospi_m24_m08); - y11 = butter_fly(u0, u1, cospi_m08_p24); + y4 = butter_fly(&u0, &u1, &cospi_m24_m08); + y11 = butter_fly(&u0, &u1, &cospi_m08_p24); u0 = _mm256_unpacklo_epi16(x5, x10); u1 = _mm256_unpackhi_epi16(x5, x10); - y5 = butter_fly(u0, u1, cospi_m24_m08); - y10 = butter_fly(u0, u1, cospi_m08_p24); + y5 = butter_fly(&u0, &u1, &cospi_m24_m08); + y10 = butter_fly(&u0, &u1, &cospi_m08_p24); // stage 5 x0 = _mm256_add_epi16(y0, y3); @@ -1349,23 +1351,23 @@ static void fdct16_odd_avx2(__m256i *in) { u0 = _mm256_unpacklo_epi16(x1, x14); u1 = _mm256_unpackhi_epi16(x1, x14); - y1 = butter_fly(u0, u1, cospi_m04_p28); - y14 = butter_fly(u0, u1, cospi_p28_p04); + y1 = butter_fly(&u0, &u1, &cospi_m04_p28); + y14 = butter_fly(&u0, &u1, &cospi_p28_p04); u0 = _mm256_unpacklo_epi16(x2, x13); u1 = _mm256_unpackhi_epi16(x2, x13); - y2 = butter_fly(u0, u1, cospi_m28_m04); - y13 = butter_fly(u0, u1, cospi_m04_p28); + y2 = butter_fly(&u0, &u1, &cospi_m28_m04); + y13 = butter_fly(&u0, &u1, &cospi_m04_p28); u0 = _mm256_unpacklo_epi16(x5, x10); u1 = _mm256_unpackhi_epi16(x5, x10); - y5 = butter_fly(u0, u1, cospi_m20_p12); - y10 = butter_fly(u0, u1, cospi_p12_p20); + y5 = butter_fly(&u0, &u1, &cospi_m20_p12); + y10 = butter_fly(&u0, &u1, &cospi_p12_p20); u0 = _mm256_unpacklo_epi16(x6, x9); u1 = _mm256_unpackhi_epi16(x6, x9); - y6 = butter_fly(u0, u1, cospi_m12_m20); - y9 = butter_fly(u0, u1, cospi_m20_p12); + y6 = butter_fly(&u0, &u1, &cospi_m12_m20); + y9 = butter_fly(&u0, &u1, &cospi_m20_p12); // stage 7 x0 = _mm256_add_epi16(y0, y1); @@ -1389,43 +1391,43 @@ static void fdct16_odd_avx2(__m256i *in) { // stage 8 u0 = _mm256_unpacklo_epi16(x0, x15); u1 = _mm256_unpackhi_epi16(x0, x15); - in[0] = butter_fly(u0, u1, cospi_p31_p01); - in[15] = butter_fly(u0, u1, cospi_m01_p31); + in[0] = butter_fly(&u0, &u1, &cospi_p31_p01); + in[15] = butter_fly(&u0, &u1, &cospi_m01_p31); u0 = _mm256_unpacklo_epi16(x1, x14); u1 = _mm256_unpackhi_epi16(x1, x14); - in[1] = butter_fly(u0, u1, cospi_p15_p17); - in[14] = butter_fly(u0, u1, cospi_m17_p15); + in[1] = butter_fly(&u0, &u1, &cospi_p15_p17); + in[14] = butter_fly(&u0, &u1, &cospi_m17_p15); u0 = _mm256_unpacklo_epi16(x2, x13); u1 = _mm256_unpackhi_epi16(x2, x13); - in[2] = butter_fly(u0, u1, cospi_p23_p09); - in[13] = butter_fly(u0, u1, cospi_m09_p23); + in[2] = butter_fly(&u0, &u1, &cospi_p23_p09); + in[13] = butter_fly(&u0, &u1, &cospi_m09_p23); u0 = _mm256_unpacklo_epi16(x3, x12); u1 = _mm256_unpackhi_epi16(x3, x12); - in[3] = butter_fly(u0, u1, cospi_p07_p25); - in[12] = butter_fly(u0, u1, cospi_m25_p07); + in[3] = butter_fly(&u0, &u1, &cospi_p07_p25); + in[12] = butter_fly(&u0, &u1, &cospi_m25_p07); u0 = _mm256_unpacklo_epi16(x4, x11); u1 = _mm256_unpackhi_epi16(x4, x11); - in[4] = butter_fly(u0, u1, cospi_p27_p05); - in[11] = butter_fly(u0, u1, cospi_m05_p27); + in[4] = butter_fly(&u0, &u1, &cospi_p27_p05); + in[11] = butter_fly(&u0, &u1, &cospi_m05_p27); u0 = _mm256_unpacklo_epi16(x5, x10); u1 = _mm256_unpackhi_epi16(x5, x10); - in[5] = butter_fly(u0, u1, cospi_p11_p21); - in[10] = butter_fly(u0, u1, cospi_m21_p11); + in[5] = butter_fly(&u0, &u1, &cospi_p11_p21); + in[10] = butter_fly(&u0, &u1, &cospi_m21_p11); u0 = _mm256_unpacklo_epi16(x6, x9); u1 = _mm256_unpackhi_epi16(x6, x9); - in[6] = butter_fly(u0, u1, cospi_p19_p13); - in[9] = butter_fly(u0, u1, cospi_m13_p19); + in[6] = butter_fly(&u0, &u1, &cospi_p19_p13); + in[9] = butter_fly(&u0, &u1, &cospi_m13_p19); u0 = _mm256_unpacklo_epi16(x7, x8); u1 = _mm256_unpackhi_epi16(x7, x8); - in[7] = butter_fly(u0, u1, cospi_p03_p29); - in[8] = butter_fly(u0, u1, cospi_m29_p03); + in[7] = butter_fly(&u0, &u1, &cospi_p03_p29); + in[8] = butter_fly(&u0, &u1, &cospi_m29_p03); } static void fdct32_avx2(__m256i *in0, __m256i *in1) { @@ -1464,7 +1466,7 @@ static INLINE void write_buffer_32x32(const __m256i *in0, const __m256i *in1, static void fhalfright32_16col_avx2(__m256i *in) { int i = 0; const __m256i zero = _mm256_setzero_si256(); - const __m256i sqrt2 = _mm256_set1_epi16(Sqrt2); + const __m256i sqrt2 = _mm256_set1_epi16((int16_t)Sqrt2); const __m256i dct_rounding = _mm256_set1_epi32(DCT_CONST_ROUNDING); __m256i x0, x1; |