diff options
Diffstat (limited to 'third_party/aom/av1/encoder/encodeframe.c')
-rw-r--r-- | third_party/aom/av1/encoder/encodeframe.c | 1362 |
1 files changed, 978 insertions, 384 deletions
diff --git a/third_party/aom/av1/encoder/encodeframe.c b/third_party/aom/av1/encoder/encodeframe.c index 36d09c02a..d13eb42fb 100644 --- a/third_party/aom/av1/encoder/encodeframe.c +++ b/third_party/aom/av1/encoder/encodeframe.c @@ -84,7 +84,7 @@ static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_EXT_INTER int mi_row_ori, int mi_col_ori, #endif // CONFIG_EXT_INTER - int mi_row_pred, int mi_col_pred, + int mi_row_pred, int mi_col_pred, int plane, BLOCK_SIZE bsize_pred, int b_sub8x8, int block); static int check_supertx_sb(BLOCK_SIZE bsize, TX_SIZE supertx_size, PC_TREE *pc_tree); @@ -308,7 +308,6 @@ static void set_offsets_without_segment_id(const AV1_COMP *const cpi, av1_setup_src_planes(x, cpi->source, mi_row, mi_col); // R/D setup. - x->rddiv = cpi->rd.RDDIV; x->rdmult = cpi->rd.RDMULT; // required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs() @@ -326,6 +325,10 @@ static void set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile, set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize); mbmi = &xd->mi[0]->mbmi; +#if CONFIG_CFL + xd->cfl->mi_row = mi_row; + xd->cfl->mi_col = mi_col; +#endif // Setup segment ID. if (seg->enabled) { @@ -413,7 +416,6 @@ static void set_offsets_extend(const AV1_COMP *const cpi, ThreadData *td, xd->left_available = (mi_col_ori > tile->mi_col_start); // R/D setup. - x->rddiv = cpi->rd.RDDIV; x->rdmult = cpi->rd.RDMULT; } @@ -539,6 +541,21 @@ static void set_ref_and_pred_mvs(MACROBLOCK *const x, int_mv *const mi_pred_mv, mbmi->pred_mv[1] = this_mv; mi_pred_mv[1] = this_mv; } +#if CONFIG_COMPOUND_SINGLEREF + } else if (is_inter_singleref_comp_mode(mbmi->mode)) { + // Special case: SR_NEAR_NEWMV uses 1 + mbmi->ref_mv_idx + // (like NEARMV) instead + if (mbmi->mode == SR_NEAR_NEWMV) ref_mv_idx += 1; + + if (compound_ref0_mode(mbmi->mode) == NEWMV || + compound_ref1_mode(mbmi->mode) == NEWMV) { + int_mv this_mv = curr_ref_mv_stack[ref_mv_idx].this_mv; + clamp_mv_ref(&this_mv.as_mv, bw, bh, xd); + mbmi_ext->ref_mvs[mbmi->ref_frame[0]][0] = this_mv; + mbmi->pred_mv[0] = this_mv; + mi_pred_mv[0] = this_mv; + } +#endif // CONFIG_COMPOUND_SINGLEREF } else { #endif // CONFIG_EXT_INTER if (mbmi->mode == NEWMV) { @@ -635,7 +652,6 @@ static void update_state(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_PALETTE for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i]; #endif // CONFIG_PALETTE - // Restore the coding context of the MB to that that was in place // when the mode was picked for it for (y = 0; y < mi_height; y++) @@ -814,7 +830,6 @@ static void update_state_supertx(const AV1_COMP *const cpi, ThreadData *td, } mi_addr->mbmi.segment_id_supertx = MAX_SEGMENTS; } - // Restore the coding context of the MB to that that was in place // when the mode was picked for it for (y = 0; y < mi_height; y++) @@ -1147,7 +1162,7 @@ static void update_supertx_param_sb(const AV1_COMP *const cpi, ThreadData *td, } #endif // CONFIG_SUPERTX -#if CONFIG_MOTION_VAR && CONFIG_NCOBMC +#if CONFIG_MOTION_VAR && (CONFIG_NCOBMC || CONFIG_NCOBMC_ADAPT_WEIGHT) static void set_mode_info_b(const AV1_COMP *const cpi, const TileInfo *const tile, ThreadData *td, int mi_row, int mi_col, BLOCK_SIZE bsize, @@ -1167,6 +1182,7 @@ static void set_mode_info_sb(const AV1_COMP *const cpi, ThreadData *td, BLOCK_SIZE subsize = get_subsize(bsize, partition); #if CONFIG_EXT_PARTITION_TYPES const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT); + const int quarter_step = mi_size_wide[bsize] / 4; #endif #if CONFIG_CB4X4 const int unify_bsize = 1; @@ -1245,6 +1261,24 @@ static void set_mode_info_sb(const AV1_COMP *const cpi, ThreadData *td, set_mode_info_b(cpi, tile, td, mi_row + hbs, mi_col + hbs, bsize2, &pc_tree->verticalb[2]); break; + case PARTITION_HORZ_4: + for (int i = 0; i < 4; ++i) { + int this_mi_row = mi_row + i * quarter_step; + if (i > 0 && this_mi_row >= cm->mi_rows) break; + + set_mode_info_b(cpi, tile, td, this_mi_row, mi_col, subsize, + &pc_tree->horizontal4[i]); + } + break; + case PARTITION_VERT_4: + for (int i = 0; i < 4; ++i) { + int this_mi_col = mi_col + i * quarter_step; + if (i > 0 && this_mi_col >= cm->mi_cols) break; + + set_mode_info_b(cpi, tile, td, mi_row, this_mi_col, subsize, + &pc_tree->vertical4[i]); + } + break; #endif // CONFIG_EXT_PARTITION_TYPES default: assert(0 && "Invalid partition type."); break; } @@ -1281,10 +1315,10 @@ static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x, return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q); } -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 -static void daala_dist_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8, - BLOCK_SIZE bsize, int bw, int bh, - int mi_row, int mi_col) { +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 +static void dist_8x8_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8, + BLOCK_SIZE bsize, int bw, int bh, + int mi_row, int mi_col) { MACROBLOCKD *const xd = &x->e_mbd; struct macroblockd_plane *const pd = &xd->plane[0]; const int dst_stride = pd->dst.stride; @@ -1294,12 +1328,24 @@ static void daala_dist_set_sub8x8_dst(MACROBLOCK *const x, uint8_t *dst8x8, if (bsize < BLOCK_8X8) { int i, j; - uint8_t *dst_sub8x8 = &dst8x8[((mi_row & 1) * 8 + (mi_col & 1)) << 2]; +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + uint16_t *dst8x8_16 = (uint16_t *)dst8x8; + uint16_t *dst_sub8x8 = &dst8x8_16[((mi_row & 1) * 8 + (mi_col & 1)) << 2]; - for (j = 0; j < bh; ++j) - for (i = 0; i < bw; ++i) { - dst_sub8x8[j * 8 + i] = dst[j * dst_stride + i]; - } + for (j = 0; j < bh; ++j) + for (i = 0; i < bw; ++i) + dst_sub8x8[j * 8 + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i]; + } else { +#endif + uint8_t *dst_sub8x8 = &dst8x8[((mi_row & 1) * 8 + (mi_col & 1)) << 2]; + + for (j = 0; j < bh; ++j) + for (i = 0; i < bw; ++i) + dst_sub8x8[j * 8 + i] = dst[j * dst_stride + i]; +#if CONFIG_HIGHBITDEPTH + } +#endif } } #endif @@ -1330,10 +1376,6 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data, x->pvq_speed = 1; x->pvq_coded = 0; #endif -#if CONFIG_CFL - // Don't store luma during RDO (we will store the best mode later). - x->cfl_store_y = 0; -#endif set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); mbmi = &xd->mi[0]->mbmi; @@ -1342,6 +1384,10 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data, mbmi->mi_row = mi_row; mbmi->mi_col = mi_col; #endif +#if CONFIG_CFL + // Don't store luma during RDO. Only store luma when best luma is known + x->cfl_store_y = 0; +#endif #if CONFIG_SUPERTX // We set tx_size here as skip blocks would otherwise not set it. // tx_size needs to be set at this point as supertx_enable in @@ -1542,6 +1588,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, #endif if (!frame_is_intra_only(cm)) { FRAME_COUNTS *const counts = td->counts; + RD_COUNTS *rdc = &td->rd_counts; const int inter_block = is_inter_block(mbmi); const int seg_ref_active = segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME); @@ -1560,6 +1607,12 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, #endif // CONFIG_EXT_REFS if (cm->reference_mode == REFERENCE_MODE_SELECT) { + if (has_second_ref(mbmi)) + // This flag is also updated for 4x4 blocks + rdc->compound_ref_used_flag = 1; + else + // This flag is also updated for 4x4 blocks + rdc->single_ref_used_flag = 1; #if !SUB8X8_COMP_REF if (mbmi->sb_type != BLOCK_4X4) counts->comp_inter[av1_get_reference_mode_context(cm, xd)] @@ -1571,24 +1624,53 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, } if (has_second_ref(mbmi)) { +#if CONFIG_EXT_COMP_REFS + const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi) + ? UNIDIR_COMP_REFERENCE + : BIDIR_COMP_REFERENCE; +#if !USE_UNI_COMP_REFS + // TODO(zoeliu): Temporarily turn off uni-directional comp refs + assert(comp_ref_type == BIDIR_COMP_REFERENCE); +#endif // !USE_UNI_COMP_REFS + counts->comp_ref_type[av1_get_comp_reference_type_context(xd)] + [comp_ref_type]++; + + if (comp_ref_type == UNIDIR_COMP_REFERENCE) { + const int bit = (ref0 == BWDREF_FRAME); + counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0] + [bit]++; + if (!bit) { + const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME); + counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1] + [bit1]++; + if (bit1) { + counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)] + [2][ref1 == GOLDEN_FRAME]++; + } + } + } else { +#endif // CONFIG_EXT_COMP_REFS #if CONFIG_EXT_REFS - const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME); + const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME); - counts->comp_ref[av1_get_pred_context_comp_ref_p(cm, xd)][0][bit]++; - if (!bit) { - counts->comp_ref[av1_get_pred_context_comp_ref_p1(cm, xd)][1] - [ref0 == LAST_FRAME]++; - } else { - counts->comp_ref[av1_get_pred_context_comp_ref_p2(cm, xd)][2] - [ref0 == GOLDEN_FRAME]++; - } + counts->comp_ref[av1_get_pred_context_comp_ref_p(cm, xd)][0][bit]++; + if (!bit) { + counts->comp_ref[av1_get_pred_context_comp_ref_p1(cm, xd)][1] + [ref0 == LAST_FRAME]++; + } else { + counts->comp_ref[av1_get_pred_context_comp_ref_p2(cm, xd)][2] + [ref0 == GOLDEN_FRAME]++; + } - counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(cm, xd)][0] - [ref1 == ALTREF_FRAME]++; -#else + counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(cm, xd)][0] + [ref1 == ALTREF_FRAME]++; +#else // !CONFIG_EXT_REFS counts->comp_ref[av1_get_pred_context_comp_ref_p(cm, xd)][0] [ref0 == GOLDEN_FRAME]++; #endif // CONFIG_EXT_REFS +#if CONFIG_EXT_COMP_REFS + } +#endif // CONFIG_EXT_COMP_REFS } else { #if CONFIG_EXT_REFS const int bit = (ref0 == ALTREF_FRAME || ref0 == BWDREF_FRAME); @@ -1609,7 +1691,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, [ref0 != LAST3_FRAME]++; } } -#else +#else // !CONFIG_EXT_REFS counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0] [ref0 != LAST_FRAME]++; if (ref0 != LAST_FRAME) { @@ -1619,7 +1701,14 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, #endif // CONFIG_EXT_REFS } -#if CONFIG_EXT_INTER && CONFIG_INTERINTRA +#if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + if (!has_second_ref(mbmi)) + counts->comp_inter_mode[av1_get_inter_mode_context(xd)] + [is_inter_singleref_comp_mode(mbmi->mode)]++; +#endif // CONFIG_COMPOUND_SINGLEREF + +#if CONFIG_INTERINTRA if (cm->reference_mode != COMPOUND_REFERENCE && #if CONFIG_SUPERTX !supertx_enabled && @@ -1635,14 +1724,33 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, counts->interintra[bsize_group][0]++; } } -#endif // CONFIG_EXT_INTER && CONFIG_INTERINTRA +#endif // CONFIG_INTERINTRA +#endif // CONFIG_EXT_INTER #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION +#if CONFIG_WARPED_MOTION + set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); +#endif +#if CONFIG_NCOBMC_ADAPT_WEIGHT + const MOTION_MODE motion_allowed = + motion_mode_allowed_wrapper(0, +#if CONFIG_GLOBAL_MOTION + 0, xd->global_motion, +#endif // CONFIG_GLOBAL_MOTION +#if CONFIG_WARPED_MOTION + xd, +#endif + mi); +#else const MOTION_MODE motion_allowed = motion_mode_allowed( -#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#if CONFIG_GLOBAL_MOTION 0, xd->global_motion, -#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#endif // CONFIG_GLOBAL_MOTION +#if CONFIG_WARPED_MOTION + xd, +#endif mi); +#endif // CONFIG_NCOBMC_ADAPT_WEIGHT #if CONFIG_SUPERTX if (!supertx_enabled) #endif // CONFIG_SUPERTX @@ -1660,11 +1768,28 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, if (motion_allowed > SIMPLE_TRANSLATION) counts->motion_mode[mbmi->sb_type][mbmi->motion_mode]++; #endif // CONFIG_MOTION_VAR && CONFIG_WARPED_MOTION + +#if CONFIG_NCOBMC_ADAPT_WEIGHT + if (mbmi->motion_mode == NCOBMC_ADAPT_WEIGHT) { + ADAPT_OVERLAP_BLOCK ao_block = + adapt_overlap_block_lookup[mbmi->sb_type]; + ++counts->ncobmc_mode[ao_block][mbmi->ncobmc_mode[0]]; + if (mi_size_wide[mbmi->sb_type] != mi_size_high[mbmi->sb_type]) { + ++counts->ncobmc_mode[ao_block][mbmi->ncobmc_mode[1]]; + } + } +#endif + #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION #if CONFIG_EXT_INTER - if (cm->reference_mode != SINGLE_REFERENCE && + if ( +#if CONFIG_COMPOUND_SINGLEREF + is_inter_anyref_comp_mode(mbmi->mode) +#else // !CONFIG_COMPOUND_SINGLEREF + cm->reference_mode != SINGLE_REFERENCE && is_inter_compound_mode(mbmi->mode) +#endif // CONFIG_COMPOUND_SINGLEREF #if CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION && mbmi->motion_mode == SIMPLE_TRANSLATION #endif // CONFIG_MOTION_VAR || CONFIG_WARPED_MOTION @@ -1683,6 +1808,12 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, if (has_second_ref(mbmi)) { mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]]; ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)]; +#if CONFIG_COMPOUND_SINGLEREF + } else if (is_inter_singleref_comp_mode(mode)) { + mode_ctx = mbmi_ext->compound_mode_context[mbmi->ref_frame[0]]; + ++counts->inter_singleref_comp_mode[mode_ctx] + [INTER_SINGLEREF_COMP_OFFSET(mode)]; +#endif // CONFIG_COMPOUND_SINGLEREF } else { #endif // CONFIG_EXT_INTER mode_ctx = av1_mode_context_analyzer(mbmi_ext->mode_context, @@ -1693,10 +1824,15 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td, int mi_row, #endif // CONFIG_EXT_INTER #if CONFIG_EXT_INTER +#if CONFIG_COMPOUND_SINGLEREF + if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV || + mbmi->mode == SR_NEW_NEWMV) { +#else // !CONFIG_COMPOUND_SINGLEREF if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) { -#else +#endif // CONFIG_COMPOUND_SINGLEREF +#else // !CONFIG_EXT_INTER if (mbmi->mode == NEWMV) { -#endif +#endif // CONFIG_EXT_INTER uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); int idx; @@ -1871,10 +2007,16 @@ static void encode_b(const AV1_COMP *const cpi, const TileInfo *const tile, update_state(cpi, td, ctx, mi_row, mi_col, bsize, dry_run); #if CONFIG_MOTION_VAR && CONFIG_NCOBMC mbmi = &xd->mi[0]->mbmi; +#if CONFIG_WARPED_MOTION + set_ref_ptrs(&cpi->common, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); +#endif const MOTION_MODE motion_allowed = motion_mode_allowed( -#if CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#if CONFIG_GLOBAL_MOTION 0, xd->global_motion, -#endif // CONFIG_GLOBAL_MOTION && SEPARATE_GLOBAL_MOTION +#endif // CONFIG_GLOBAL_MOTION +#if CONFIG_WARPED_MOTION + xd, +#endif xd->mi[0]); check_ncobmc = is_inter_block(mbmi) && motion_allowed >= OBMC_CAUSAL; if (!dry_run && check_ncobmc) { @@ -1922,6 +2064,8 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td, const BLOCK_SIZE subsize = get_subsize(bsize, partition); #if CONFIG_EXT_PARTITION_TYPES const BLOCK_SIZE bsize2 = get_subsize(bsize, PARTITION_SPLIT); + int quarter_step = mi_size_wide[bsize] / 4; + int i; #endif #if CONFIG_CB4X4 @@ -1933,6 +2077,11 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; +#if CONFIG_SPEED_REFS + // First scanning pass of an SB is dry run only. + if (cpi->sb_scanning_pass_idx == 0) assert(dry_run == DRY_RUN_NORMAL); +#endif // CONFIG_SPEED_REFS + if (!dry_run && ctx >= 0) td->counts->partition[ctx][partition]++; #if CONFIG_SUPERTX @@ -2115,6 +2264,24 @@ static void encode_sb(const AV1_COMP *const cpi, ThreadData *td, encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col + hbs, dry_run, bsize2, partition, &pc_tree->verticalb[2], rate); break; + case PARTITION_HORZ_4: + for (i = 0; i < 4; ++i) { + int this_mi_row = mi_row + i * quarter_step; + if (i > 0 && this_mi_row >= cm->mi_rows) break; + + encode_b(cpi, tile, td, tp, this_mi_row, mi_col, dry_run, subsize, + partition, &pc_tree->horizontal4[i], rate); + } + break; + case PARTITION_VERT_4: + for (i = 0; i < 4; ++i) { + int this_mi_col = mi_col + i * quarter_step; + if (i > 0 && this_mi_col >= cm->mi_cols) break; + + encode_b(cpi, tile, td, tp, mi_row, this_mi_col, dry_run, subsize, + partition, &pc_tree->vertical4[i], rate); + } + break; #endif // CONFIG_EXT_PARTITION_TYPES default: assert(0 && "Invalid partition type."); break; } @@ -2302,8 +2469,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, if (none_rdc.rate < INT_MAX) { none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; - none_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist); + none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist); #if CONFIG_SUPERTX none_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; #endif @@ -2473,7 +2639,9 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, case PARTITION_VERT_A: case PARTITION_VERT_B: case PARTITION_HORZ_A: - case PARTITION_HORZ_B: assert(0 && "Cannot handle extended partiton types"); + case PARTITION_HORZ_B: + case PARTITION_HORZ_4: + case PARTITION_VERT_4: assert(0 && "Cannot handle extended partiton types"); #endif // CONFIG_EXT_PARTITION_TYPES default: assert(0); break; } @@ -2481,7 +2649,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, if (last_part_rdc.rate < INT_MAX) { last_part_rdc.rate += cpi->partition_cost[pl][partition]; last_part_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist); + RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist); #if CONFIG_SUPERTX last_part_rate_nocoef += cpi->partition_cost[pl][partition]; #endif @@ -2565,8 +2733,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, } if (chosen_rdc.rate < INT_MAX) { chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; - chosen_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist); + chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist); #if CONFIG_SUPERTX chosen_rate_nocoef += cpi->partition_cost[pl][PARTITION_NONE]; #endif @@ -2624,8 +2791,8 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, } /* clang-format off */ -static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static const BLOCK_SIZE min_partition_size[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 BLOCK_2X2, BLOCK_2X2, BLOCK_2X2, // 2x2, 2x4, 4x2 #endif BLOCK_4X4, // 4x4 @@ -2634,12 +2801,14 @@ static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 16x32, 32x16, 32x32 BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 32x64, 64x32, 64x64 #if CONFIG_EXT_PARTITION - BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 // 64x128, 128x64, 128x128 + BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 64x128, 128x64, 128x128 #endif // CONFIG_EXT_PARTITION + BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x16, 16x4, 8x32 + BLOCK_8X8 // 32x8 }; -static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static const BLOCK_SIZE max_partition_size[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, // 2x2, 2x4, 4x2 #endif BLOCK_8X8, // 4x4 @@ -2648,13 +2817,15 @@ static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 16x32, 32x16, 32x32 BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST, // 32x64, 64x32, 64x64 #if CONFIG_EXT_PARTITION - BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST // 64x128, 128x64, 128x128 + BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST, // 64x128, 128x64, 128x128 #endif // CONFIG_EXT_PARTITION + BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 4x16, 16x4, 8x32 + BLOCK_32X32 // 32x8 }; // Next square block size less or equal than current block size. -static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { -#if CONFIG_CB4X4 +static const BLOCK_SIZE next_square_size[BLOCK_SIZES_ALL] = { +#if CONFIG_CHROMA_2X2 || CONFIG_CHROMA_SUB8X8 BLOCK_2X2, BLOCK_2X2, BLOCK_2X2, // 2x2, 2x4, 4x2 #endif BLOCK_4X4, // 4x4 @@ -2663,8 +2834,10 @@ static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 16x32, 32x16, 32x32 BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, // 32x64, 64x32, 64x64 #if CONFIG_EXT_PARTITION - BLOCK_64X64, BLOCK_64X64, BLOCK_128X128 // 64x128, 128x64, 128x128 + BLOCK_64X64, BLOCK_64X64, BLOCK_128X128, // 64x128, 128x64, 128x128 #endif // CONFIG_EXT_PARTITION + BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x16, 16x4, 8x32 + BLOCK_8X8 // 32x8 }; /* clang-format on */ @@ -3055,8 +3228,7 @@ static void rd_test_partition3( cm->fc->supertx_prob[partition_supertx_context_lookup[partition]] [supertx_size], 0); - sum_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) { TX_TYPE best_tx = DCT_DCT; @@ -3071,8 +3243,7 @@ static void rd_test_partition3( cm->fc->supertx_prob[partition_supertx_context_lookup[partition]] [supertx_size], 1); - tmp_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist); + tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist); if (tmp_rdc.rdcost < sum_rdc.rdcost) { sum_rdc = tmp_rdc; update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx, @@ -3091,8 +3262,7 @@ static void rd_test_partition3( #endif bsize); sum_rdc.rate += cpi->partition_cost[pl][partition]; - sum_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); #if CONFIG_SUPERTX sum_rate_nocoef += cpi->partition_cost[pl][partition]; #endif @@ -3161,7 +3331,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_SUPERTX int this_rate_nocoef, sum_rate_nocoef = 0, best_rate_nocoef = INT_MAX; int abort_flag; - const int supertx_allowed = !frame_is_intra_only(cm) && + const int supertx_allowed = !frame_is_intra_only(cm) && bsize >= BLOCK_8X8 && bsize <= MAX_SUPERTX_BLOCK_SIZE && !xd->lossless[0]; #endif // CONFIG_SUPERTX @@ -3341,6 +3511,17 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, } #endif +#if CONFIG_SPEED_REFS + if (cpi->sb_scanning_pass_idx == 0) { + // NOTE: For the 1st pass of scanning, check all the subblocks of equal size + // only. + partition_none_allowed = (bsize == MIN_SPEED_REFS_BLKSIZE); + partition_horz_allowed = 0; + partition_vert_allowed = 0; + do_square_split = (bsize > MIN_SPEED_REFS_BLKSIZE); + } +#endif // CONFIG_SPEED_REFS + // PARTITION_NONE if (partition_none_allowed) { rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, @@ -3354,8 +3535,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, if (this_rdc.rate != INT_MAX) { if (bsize_at_least_8x8) { this_rdc.rate += partition_cost[PARTITION_NONE]; - this_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); + this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist); #if CONFIG_SUPERTX this_rate_nocoef += partition_cost[PARTITION_NONE]; #endif @@ -3494,8 +3674,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, cm->fc->supertx_prob[partition_supertx_context_lookup [PARTITION_SPLIT]][supertx_size], 0); - sum_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); if (is_inter_mode(pc_tree->leaf_split[0]->mic.mbmi.mode)) { TX_TYPE best_tx = DCT_DCT; @@ -3512,8 +3691,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, cm->fc->supertx_prob[partition_supertx_context_lookup [PARTITION_SPLIT]][supertx_size], 1); - tmp_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist); + tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist); if (tmp_rdc.rdcost < sum_rdc.rdcost) { sum_rdc = tmp_rdc; update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx, @@ -3551,6 +3729,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, &this_rdc, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[idx]); #endif // CONFIG_SUPERTX +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if (bsize == BLOCK_8X8 && this_rdc.rate != INT_MAX) { + assert(this_rdc.dist_y < INT64_MAX); + } +#endif if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; #if CONFIG_SUPERTX @@ -3564,28 +3747,40 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_SUPERTX sum_rate_nocoef += this_rate_nocoef; #endif // CONFIG_SUPERTX -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 - sum_rdc.dist_y += this_rdc.dist_y; +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if (bsize == BLOCK_8X8) { + assert(this_rdc.dist_y < INT64_MAX); + sum_rdc.dist_y += this_rdc.dist_y; + } #endif } } reached_last_index = (idx == 4); -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (reached_last_index && sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) { - int use_activity_masking = 0; - int64_t daala_dist; + int64_t dist_8x8; const int src_stride = x->plane[0].src.stride; - daala_dist = av1_daala_dist(x->plane[0].src.buf - 4 * src_stride - 4, - src_stride, x->decoded_8x8, 8, 8, 8, 1, - use_activity_masking, x->qindex) - << 4; - sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist; - sum_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + uint8_t *decoded_8x8; + +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8); + else +#endif + decoded_8x8 = (uint8_t *)x->decoded_8x8; + + dist_8x8 = + av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride - 4, + src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8, + x->qindex) + << 4; + assert(sum_rdc.dist_y < INT64_MAX); + sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); } -#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 +#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4 #if CONFIG_SUPERTX if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && reached_last_index) { @@ -3598,8 +3793,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, cm->fc->supertx_prob[partition_supertx_context_lookup [PARTITION_SPLIT]][supertx_size], 0); - sum_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) { TX_TYPE best_tx = DCT_DCT; @@ -3616,8 +3810,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, cm->fc->supertx_prob[partition_supertx_context_lookup [PARTITION_SPLIT]][supertx_size], 1); - tmp_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist); + tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist); if (tmp_rdc.rdcost < sum_rdc.rdcost) { sum_rdc = tmp_rdc; update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx, @@ -3632,7 +3825,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) { sum_rdc.rate += partition_cost[PARTITION_SPLIT]; - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); #if CONFIG_SUPERTX sum_rate_nocoef += partition_cost[PARTITION_SPLIT]; #endif // CONFIG_SUPERTX @@ -3725,14 +3918,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, best_rdc.rdcost - sum_rdc.rdcost); #endif // CONFIG_SUPERTX -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) { update_state(cpi, td, &pc_tree->horizontal[1], mi_row + mi_step, mi_col, subsize, DRY_RUN_NORMAL); encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row + mi_step, mi_col, subsize, NULL); } -#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 +#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4 if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; @@ -3746,24 +3939,31 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_SUPERTX sum_rate_nocoef += this_rate_nocoef; #endif // CONFIG_SUPERTX -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 sum_rdc.dist_y += this_rdc.dist_y; #endif } -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) { - int use_activity_masking = 0; - int64_t daala_dist; + int64_t dist_8x8; const int src_stride = x->plane[0].src.stride; - daala_dist = av1_daala_dist(x->plane[0].src.buf - 4 * src_stride, - src_stride, x->decoded_8x8, 8, 8, 8, 1, - use_activity_masking, x->qindex) - << 4; - sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist; - sum_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + uint8_t *decoded_8x8; + +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8); + else +#endif + decoded_8x8 = (uint8_t *)x->decoded_8x8; + + dist_8x8 = av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4 * src_stride, + src_stride, decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, + 8, x->qindex) + << 4; + sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); } -#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 +#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4 } #if CONFIG_SUPERTX @@ -3777,7 +3977,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, cm->fc->supertx_prob[partition_supertx_context_lookup[PARTITION_HORZ]] [supertx_size], 0); - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) { TX_TYPE best_tx = DCT_DCT; @@ -3795,8 +3995,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, ->supertx_prob[partition_supertx_context_lookup[PARTITION_HORZ]] [supertx_size], 1); - tmp_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist); + tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist); if (tmp_rdc.rdcost < sum_rdc.rdcost) { sum_rdc = tmp_rdc; update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx, @@ -3810,7 +4009,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, if (sum_rdc.rdcost < best_rdc.rdcost) { sum_rdc.rate += partition_cost[PARTITION_HORZ]; - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); #if CONFIG_SUPERTX sum_rate_nocoef += partition_cost[PARTITION_HORZ]; #endif // CONFIG_SUPERTX @@ -3899,14 +4098,14 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, best_rdc.rdcost - sum_rdc.rdcost); #endif // CONFIG_SUPERTX -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) { update_state(cpi, td, &pc_tree->vertical[1], mi_row, mi_col + mi_step, subsize, DRY_RUN_NORMAL); encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, mi_col + mi_step, subsize, NULL); } -#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 +#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4 if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; @@ -3920,24 +4119,31 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_SUPERTX sum_rate_nocoef += this_rate_nocoef; #endif // CONFIG_SUPERTX -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 sum_rdc.dist_y += this_rdc.dist_y; #endif } -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) { - int use_activity_masking = 0; - int64_t daala_dist; + int64_t dist_8x8; const int src_stride = x->plane[0].src.stride; - daala_dist = - av1_daala_dist(x->plane[0].src.buf - 4, src_stride, x->decoded_8x8, - 8, 8, 8, 1, use_activity_masking, x->qindex) + uint8_t *decoded_8x8; + +#if CONFIG_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) + decoded_8x8 = CONVERT_TO_BYTEPTR(x->decoded_8x8); + else +#endif + decoded_8x8 = (uint8_t *)x->decoded_8x8; + + dist_8x8 = + av1_dist_8x8(cpi, xd, x->plane[0].src.buf - 4, src_stride, + decoded_8x8, 8, BLOCK_8X8, 8, 8, 8, 8, x->qindex) << 4; - sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + daala_dist; - sum_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.dist = sum_rdc.dist - sum_rdc.dist_y + dist_8x8; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); } -#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 +#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4 } #if CONFIG_SUPERTX if (supertx_allowed && sum_rdc.rdcost < INT64_MAX && !abort_flag) { @@ -3950,7 +4156,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, cm->fc->supertx_prob[partition_supertx_context_lookup[PARTITION_VERT]] [supertx_size], 0); - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); if (!check_intra_sb(cpi, tile_info, mi_row, mi_col, bsize, pc_tree)) { TX_TYPE best_tx = DCT_DCT; @@ -3968,8 +4174,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, ->supertx_prob[partition_supertx_context_lookup[PARTITION_VERT]] [supertx_size], 1); - tmp_rdc.rdcost = - RDCOST(x->rdmult, x->rddiv, tmp_rdc.rate, tmp_rdc.dist); + tmp_rdc.rdcost = RDCOST(x->rdmult, tmp_rdc.rate, tmp_rdc.dist); if (tmp_rdc.rdcost < sum_rdc.rdcost) { sum_rdc = tmp_rdc; update_supertx_param_sb(cpi, td, mi_row, mi_col, bsize, best_tx, @@ -3983,7 +4188,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, if (sum_rdc.rdcost < best_rdc.rdcost) { sum_rdc.rate += partition_cost[PARTITION_VERT]; - sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); #if CONFIG_SUPERTX sum_rate_nocoef += partition_cost[PARTITION_VERT]; #endif // CONFIG_SUPERTX @@ -4060,14 +4265,139 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, bsize2, mi_row + mi_step, mi_col + mi_step, bsize2); restore_context(x, &x_ctx, mi_row, mi_col, bsize); } + + // PARTITION_HORZ_4 + // TODO(david.barker): For this and PARTITION_VERT_4, + // * Add support for BLOCK_16X16 once we support 2x8 and 8x2 blocks for the + // chroma plane + // * Add support for supertx + if (bsize == BLOCK_32X32 && partition_horz_allowed && !force_horz_split && + (do_rectangular_split || av1_active_h_edge(cpi, mi_row, mi_step))) { + int i; + const int quarter_step = mi_size_high[bsize] / 4; + PICK_MODE_CONTEXT *ctx_prev = ctx_none; + + subsize = get_subsize(bsize, PARTITION_HORZ_4); + av1_zero(sum_rdc); + + for (i = 0; i < 4; ++i) { + int this_mi_row = mi_row + i * quarter_step; + + if (i > 0 && this_mi_row >= cm->mi_rows) break; + + if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_prev); + + ctx_prev = &pc_tree->horizontal4[i]; + + rd_pick_sb_modes(cpi, tile_data, x, this_mi_row, mi_col, &this_rdc, + PARTITION_HORZ_4, subsize, ctx_prev, + best_rdc.rdcost - sum_rdc.rdcost); + + if (this_rdc.rate == INT_MAX) { + sum_rdc.rdcost = INT64_MAX; + break; + } else { + sum_rdc.rate += this_rdc.rate; + sum_rdc.dist += this_rdc.dist; + sum_rdc.rdcost += this_rdc.rdcost; + } + + if (sum_rdc.rdcost >= best_rdc.rdcost) break; + + if (i < 3) { + update_state(cpi, td, ctx_prev, this_mi_row, mi_col, subsize, 1); + encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, this_mi_row, mi_col, + subsize, NULL); + } + } + + if (sum_rdc.rdcost < best_rdc.rdcost) { + sum_rdc.rate += partition_cost[PARTITION_HORZ_4]; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); + if (sum_rdc.rdcost < best_rdc.rdcost) { + best_rdc = sum_rdc; + pc_tree->partitioning = PARTITION_HORZ_4; + } + } +#if !CONFIG_PVQ + restore_context(x, &x_ctx, mi_row, mi_col, bsize); +#else + restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize); +#endif + } + // PARTITION_VERT_4 + if (bsize == BLOCK_32X32 && partition_vert_allowed && !force_vert_split && + (do_rectangular_split || av1_active_v_edge(cpi, mi_row, mi_step))) { + int i; + const int quarter_step = mi_size_wide[bsize] / 4; + PICK_MODE_CONTEXT *ctx_prev = ctx_none; + + subsize = get_subsize(bsize, PARTITION_VERT_4); + av1_zero(sum_rdc); + + for (i = 0; i < 4; ++i) { + int this_mi_col = mi_col + i * quarter_step; + + if (i > 0 && this_mi_col >= cm->mi_cols) break; + + if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_prev); + + ctx_prev = &pc_tree->vertical4[i]; + + rd_pick_sb_modes(cpi, tile_data, x, mi_row, this_mi_col, &this_rdc, + PARTITION_VERT_4, subsize, ctx_prev, + best_rdc.rdcost - sum_rdc.rdcost); + + if (this_rdc.rate == INT_MAX) { + sum_rdc.rdcost = INT64_MAX; + } else { + sum_rdc.rate += this_rdc.rate; + sum_rdc.dist += this_rdc.dist; + sum_rdc.rdcost += this_rdc.rdcost; + } + + if (sum_rdc.rdcost >= best_rdc.rdcost) break; + + if (i < 3) { + update_state(cpi, td, ctx_prev, mi_row, this_mi_col, subsize, 1); + encode_superblock(cpi, td, tp, DRY_RUN_NORMAL, mi_row, this_mi_col, + subsize, NULL); + } + } + + if (sum_rdc.rdcost < best_rdc.rdcost) { + sum_rdc.rate += partition_cost[PARTITION_VERT_4]; + sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); + if (sum_rdc.rdcost < best_rdc.rdcost) { + best_rdc = sum_rdc; + pc_tree->partitioning = PARTITION_VERT_4; + } + } +#if !CONFIG_PVQ + restore_context(x, &x_ctx, mi_row, mi_col, bsize); +#else + restore_context(x, &x_ctx, mi_row, mi_col, &pre_rdo_buf, bsize); +#endif + } #endif // CONFIG_EXT_PARTITION_TYPES +#if CONFIG_SPEED_REFS + // First scanning is done. + if (cpi->sb_scanning_pass_idx == 0 && bsize == cm->sb_size) return; +#endif // CONFIG_SPEED_REFS + // TODO(jbb): This code added so that we avoid static analysis // warning related to the fact that best_rd isn't used after this // point. This code should be refactored so that the duplicate // checks occur in some sub function and thus are used... (void)best_rd; *rd_cost = best_rdc; + +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 + if (bsize <= BLOCK_8X8 && rd_cost->rate != INT_MAX) { + assert(rd_cost->dist_y < INT64_MAX); + } +#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4 #if CONFIG_SUPERTX *rate_nocoef = best_rate_nocoef; #endif // CONFIG_SUPERTX @@ -4093,13 +4423,13 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, x->cfl_store_y = 0; #endif -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && bsize == BLOCK_4X4 && pc_tree->index == 3) { encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, pc_tree, NULL); } -#endif // CONFIG_DAALA_DIST && CONFIG_CB4X4 +#endif // CONFIG_DIST_8X8 && CONFIG_CB4X4 if (bsize == cm->sb_size) { #if !CONFIG_PVQ && !CONFIG_LV_MAP @@ -4112,6 +4442,22 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td, } } +#if CONFIG_SPEED_REFS +static void restore_mi(const AV1_COMP *const cpi, MACROBLOCK *const x, + int mi_row, int mi_col) { + const AV1_COMMON *cm = &cpi->common; + MACROBLOCKD *const xd = &x->e_mbd; + set_mode_info_offsets(cpi, x, xd, mi_row, mi_col); + int x_idx, y; + for (y = 0; y < mi_size_high[cm->sb_size]; y++) + for (x_idx = 0; x_idx < mi_size_wide[cm->sb_size]; x_idx++) + if (mi_col + x_idx < cm->mi_cols && mi_row + y < cm->mi_rows) { + memset(xd->mi + y * cm->mi_stride + x_idx, 0, sizeof(*xd->mi)); + memset(x->mbmi_ext + y * cm->mi_cols + x_idx, 0, sizeof(*x->mbmi_ext)); + } +} +#endif // CONFIG_SPEED_REFS + static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, int mi_row, TOKENEXTRA **tp) { @@ -4157,8 +4503,6 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td, MODE_INFO **mi = cm->mi_grid_visible + idx_str; PC_TREE *const pc_root = td->pc_root[cm->mib_size_log2 - MIN_MIB_SIZE_LOG2]; - av1_update_boundary_info(cm, tile_info, mi_row, mi_col); - if (sf->adaptive_pred_interp_filter) { for (i = 0; i < leaf_nodes; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE; @@ -4258,12 +4602,35 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td, rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, &x->min_partition_size, &x->max_partition_size); } +#if CONFIG_SPEED_REFS + // NOTE: Two scanning passes for the current superblock - the first pass + // is only targeted to collect stats. + int m_search_count_backup = *(x->m_search_count_ptr); + for (int sb_pass_idx = 0; sb_pass_idx < 2; ++sb_pass_idx) { + cpi->sb_scanning_pass_idx = sb_pass_idx; + if (frame_is_intra_only(cm) && sb_pass_idx == 0) continue; + + rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, cm->sb_size, + &dummy_rdc, +#if CONFIG_SUPERTX + &dummy_rate_nocoef, +#endif // CONFIG_SUPERTX + INT64_MAX, pc_root); + if (sb_pass_idx == 0) { + av1_zero(x->pred_mv); + pc_root->index = 0; + restore_mi(cpi, x, mi_row, mi_col); + *(x->m_search_count_ptr) = m_search_count_backup; + } + } +#else // !CONFIG_SPEED_REFS rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, cm->sb_size, &dummy_rdc, #if CONFIG_SUPERTX &dummy_rate_nocoef, #endif // CONFIG_SUPERTX INT64_MAX, pc_root); +#endif // CONFIG_SPEED_REFS } } } @@ -4329,20 +4696,11 @@ static MV_REFERENCE_FRAME get_frame_type(const AV1_COMP *cpi) { return LAST_FRAME; } -static TX_MODE select_tx_mode(const AV1_COMP *cpi, MACROBLOCKD *const xd) { - int i, all_lossless = 1; - - if (cpi->common.seg.enabled) { - for (i = 0; i < MAX_SEGMENTS; ++i) { - if (!xd->lossless[i]) { - all_lossless = 0; - break; - } - } - } else { - all_lossless = xd->lossless[0]; - } - if (all_lossless) return ONLY_4X4; +static TX_MODE select_tx_mode(const AV1_COMP *cpi) { + if (cpi->common.all_lossless) return ONLY_4X4; +#if CONFIG_VAR_TX_NO_TX_MODE + return TX_MODE_SELECT; +#else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) return ALLOW_32X32 + CONFIG_TX64X64; else if (cpi->sf.tx_size_search_method == USE_FULL_RD || @@ -4350,6 +4708,7 @@ static TX_MODE select_tx_mode(const AV1_COMP *cpi, MACROBLOCKD *const xd) { return TX_MODE_SELECT; else return cpi->common.tx_mode; +#endif // CONFIG_VAR_TX_NO_TX_MODE } void av1_init_tile_data(AV1_COMP *cpi) { @@ -4372,7 +4731,7 @@ void av1_init_tile_data(AV1_COMP *cpi) { TileDataEnc *const tile_data = &cpi->tile_data[tile_row * tile_cols + tile_col]; int i, j; - for (i = 0; i < BLOCK_SIZES; ++i) { + for (i = 0; i < BLOCK_SIZES_ALL; ++i) { for (j = 0; j < MAX_MODES; ++j) { tile_data->thresh_freq_fact[i][j] = 32; tile_data->mode_map[i][j] = j; @@ -4415,12 +4774,8 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row, int mi_row; #if CONFIG_DEPENDENT_HORZTILES -#if CONFIG_TILE_GROUPS if ((!cm->dependent_horz_tiles) || (tile_row == 0) || tile_info->tg_horz_boundary) { -#else - if ((!cm->dependent_horz_tiles) || (tile_row == 0)) { -#endif av1_zero_above_context(cm, tile_info->mi_col_start, tile_info->mi_col_end); } #else @@ -4504,22 +4859,21 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row, #endif #endif // #if CONFIG_PVQ -#if CONFIG_EC_ADAPT this_tile->tctx = *cm->fc; td->mb.e_mbd.tile_ctx = &this_tile->tctx; -#endif // #if CONFIG_EC_ADAPT #if CONFIG_CFL MACROBLOCKD *const xd = &td->mb.e_mbd; xd->cfl = &this_tile->cfl; - cfl_init(xd->cfl, cm, xd->plane[AOM_PLANE_U].subsampling_x, - xd->plane[AOM_PLANE_U].subsampling_y); + cfl_init(xd->cfl, cm); #endif #if CONFIG_PVQ td->mb.daala_enc.state.adapt = &this_tile->tctx.pvq_context; #endif // CONFIG_PVQ + av1_setup_across_tile_boundary_info(cm, tile_info); + for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end; mi_row += cm->mib_size) { encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); @@ -4656,6 +5010,36 @@ static int do_gm_search_logic(SPEED_FEATURES *const sf, int num_refs_using_gm, } #endif // CONFIG_GLOBAL_MOTION +#if CONFIG_PALETTE +// Estimate if the source frame is screen content, based on the portion of +// blocks that have no more than 4 (experimentally selected) luma colors. +static int is_screen_content(const uint8_t *src, +#if CONFIG_HIGHBITDEPTH + int use_hbd, int bd, +#endif // CONFIG_HIGHBITDEPTH + int stride, int width, int height) { + assert(src != NULL); + int counts = 0; + const int blk_w = 16; + const int blk_h = 16; + const int limit = 4; + for (int r = 0; r + blk_h <= height; r += blk_h) { + for (int c = 0; c + blk_w <= width; c += blk_w) { + const int n_colors = +#if CONFIG_HIGHBITDEPTH + use_hbd ? av1_count_colors_highbd(src + r * stride + c, stride, blk_w, + blk_h, bd) + : +#endif // CONFIG_HIGHBITDEPTH + av1_count_colors(src + r * stride + c, stride, blk_w, blk_h); + if (n_colors > 1 && n_colors <= limit) counts++; + } + } + // The threshold is 10%. + return counts * blk_h * blk_w * 10 > width * height; +} +#endif // CONFIG_PALETTE + static void encode_frame_internal(AV1_COMP *cpi) { ThreadData *const td = &cpi->td; MACROBLOCK *const x = &td->mb; @@ -4682,6 +5066,23 @@ static void encode_frame_internal(AV1_COMP *cpi) { av1_zero(rdc->coef_counts); av1_zero(rdc->comp_pred_diff); +#if CONFIG_PALETTE || CONFIG_INTRABC + if (frame_is_intra_only(cm)) { +#if CONFIG_PALETTE + cm->allow_screen_content_tools = + cpi->oxcf.content == AOM_CONTENT_SCREEN || + is_screen_content(cpi->source->y_buffer, +#if CONFIG_HIGHBITDEPTH + cpi->source->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, +#endif // CONFIG_HIGHBITDEPTH + cpi->source->y_stride, cpi->source->y_width, + cpi->source->y_height); +#else + cm->allow_screen_content_tools = cpi->oxcf.content == AOM_CONTENT_SCREEN; +#endif // CONFIG_PALETTE + } +#endif // CONFIG_PALETTE || CONFIG_INTRABC + #if CONFIG_GLOBAL_MOTION av1_zero(rdc->global_motion_used); av1_zero(cpi->gmparams_cost); @@ -4709,6 +5110,8 @@ static void encode_frame_internal(AV1_COMP *cpi) { memcpy(&cm->global_motion[frame], &cm->global_motion[pframe], sizeof(WarpedMotionParams)); } else if (ref_buf[frame] && + ref_buf[frame]->y_crop_width == cpi->source->y_crop_width && + ref_buf[frame]->y_crop_height == cpi->source->y_crop_height && do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame)) { TransformationType model; const int64_t ref_frame_error = av1_frame_error( @@ -4716,8 +5119,8 @@ static void encode_frame_internal(AV1_COMP *cpi) { xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, #endif // CONFIG_HIGHBITDEPTH ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride, - cpi->source->y_buffer, 0, 0, cpi->source->y_width, - cpi->source->y_height, cpi->source->y_stride); + cpi->source->y_buffer, cpi->source->y_width, cpi->source->y_height, + cpi->source->y_stride); if (ref_frame_error == 0) continue; @@ -4752,7 +5155,8 @@ static void encode_frame_internal(AV1_COMP *cpi) { ref_buf[frame]->y_buffer, ref_buf[frame]->y_width, ref_buf[frame]->y_height, ref_buf[frame]->y_stride, cpi->source->y_buffer, cpi->source->y_width, - cpi->source->y_height, cpi->source->y_stride, 3); + cpi->source->y_height, cpi->source->y_stride, 5, + best_warp_error); if (warp_error < best_warp_error) { best_warp_error = warp_error; // Save the wm_params modified by refine_integerized_param() @@ -4812,10 +5216,10 @@ static void encode_frame_internal(AV1_COMP *cpi) { cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; xd->qindex[i] = qindex; } - + cm->all_lossless = all_lossless(cm, xd); if (!cm->seg.enabled && xd->lossless[0]) x->optimize = 0; - cm->tx_mode = select_tx_mode(cpi, xd); + cm->tx_mode = select_tx_mode(cpi); #if CONFIG_DELTA_Q // Fix delta q resolution for the moment @@ -4859,18 +5263,32 @@ static void encode_frame_internal(AV1_COMP *cpi) { #if CONFIG_TEMPMV_SIGNALING if (cm->prev_frame) { - cm->use_prev_frame_mvs &= !cm->error_resilient_mode && - cm->width == cm->prev_frame->buf.y_width && - cm->height == cm->prev_frame->buf.y_height && - !cm->intra_only && !cm->prev_frame->intra_only; + cm->use_prev_frame_mvs &= + !cm->error_resilient_mode && +#if CONFIG_FRAME_SUPERRES + cm->width == cm->last_width && cm->height == cm->last_height && +#else + cm->width == cm->prev_frame->buf.y_crop_width && + cm->height == cm->prev_frame->buf.y_crop_height && +#endif // CONFIG_FRAME_SUPERRES + !cm->intra_only && !cm->prev_frame->intra_only && cm->last_show_frame; } else { cm->use_prev_frame_mvs = 0; } #else - cm->use_prev_frame_mvs = !cm->error_resilient_mode && cm->prev_frame && - cm->width == cm->prev_frame->buf.y_crop_width && - cm->height == cm->prev_frame->buf.y_crop_height && - !cm->intra_only && cm->last_show_frame; + if (cm->prev_frame) { + cm->use_prev_frame_mvs = !cm->error_resilient_mode && +#if CONFIG_FRAME_SUPERRES + cm->width == cm->last_width && + cm->height == cm->last_height && +#else + cm->width == cm->prev_frame->buf.y_crop_width && + cm->height == cm->prev_frame->buf.y_crop_height && +#endif // CONFIG_FRAME_SUPERRES + !cm->intra_only && cm->last_show_frame; + } else { + cm->use_prev_frame_mvs = 0; + } #endif // CONFIG_TEMPMV_SIGNALING // Special case: set prev_mi to NULL when the previous mode info @@ -4894,6 +5312,8 @@ static void encode_frame_internal(AV1_COMP *cpi) { } #endif + av1_setup_frame_boundary_info(cm); + // If allowed, encoding tiles in parallel with one thread handling one tile. // TODO(geza.lore): The multi-threaded encoder is not safe with more than // 1 tile rows, as it uses the single above_context et al arrays from @@ -4921,7 +5341,11 @@ static void make_consistent_compound_tools(AV1_COMMON *cm) { cm->allow_interintra_compound = 0; #endif // CONFIG_INTERINTRA #if CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE +#if CONFIG_COMPOUND_SINGLEREF + if (frame_is_intra_only(cm)) +#else // !CONFIG_COMPOUND_SINGLEREF if (frame_is_intra_only(cm) || cm->reference_mode == SINGLE_REFERENCE) +#endif // CONFIG_COMPOUND_SINGLEREF cm->allow_masked_compound = 0; #endif // CONFIG_COMPOUND_SEGMENT || CONFIG_WEDGE } @@ -4942,14 +5366,14 @@ void av1_encode_frame(AV1_COMP *cpi) { // side behavior is where the ALT ref buffer has opposite sign bias to // the other two. if (!frame_is_intra_only(cm)) { -#if !CONFIG_ONE_SIDED_COMPOUND +#if !(CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS) if ((cm->ref_frame_sign_bias[ALTREF_FRAME] == cm->ref_frame_sign_bias[GOLDEN_FRAME]) || (cm->ref_frame_sign_bias[ALTREF_FRAME] == cm->ref_frame_sign_bias[LAST_FRAME])) { cpi->allow_comp_inter_inter = 0; } else { -#endif +#endif // !(CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS) cpi->allow_comp_inter_inter = 1; #if CONFIG_EXT_REFS cm->comp_fwd_ref[0] = LAST_FRAME; @@ -4962,10 +5386,11 @@ void av1_encode_frame(AV1_COMP *cpi) { cm->comp_fixed_ref = ALTREF_FRAME; cm->comp_var_ref[0] = LAST_FRAME; cm->comp_var_ref[1] = GOLDEN_FRAME; -#endif // CONFIG_EXT_REFS -#if !CONFIG_ONE_SIDED_COMPOUND // Normative in encoder +#endif // CONFIG_EXT_REFS +#if !(CONFIG_ONE_SIDED_COMPOUND || \ + CONFIG_EXT_COMP_REFS) // Normative in encoder } -#endif +#endif // !(CONFIG_ONE_SIDED_COMPOUND || CONFIG_EXT_COMP_REFS) } else { cpi->allow_comp_inter_inter = 0; } @@ -4998,7 +5423,12 @@ void av1_encode_frame(AV1_COMP *cpi) { else cm->reference_mode = REFERENCE_MODE_SELECT; #else +#if CONFIG_BGSPRITE + (void)is_alt_ref; + if (!cpi->allow_comp_inter_inter) +#else if (is_alt_ref || !cpi->allow_comp_inter_inter) +#endif // CONFIG_BGSPRITE cm->reference_mode = SINGLE_REFERENCE; else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] && mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] && @@ -5017,25 +5447,23 @@ void av1_encode_frame(AV1_COMP *cpi) { #if CONFIG_EXT_INTER make_consistent_compound_tools(cm); #endif // CONFIG_EXT_INTER + + rdc->single_ref_used_flag = 0; + rdc->compound_ref_used_flag = 0; + encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2; if (cm->reference_mode == REFERENCE_MODE_SELECT) { - int single_count_zero = 0; - int comp_count_zero = 0; - - for (i = 0; i < COMP_INTER_CONTEXTS; i++) { - single_count_zero += counts->comp_inter[i][0]; - comp_count_zero += counts->comp_inter[i][1]; - } - - if (comp_count_zero == 0) { + // Use a flag that includes 4x4 blocks + if (rdc->compound_ref_used_flag == 0) { cm->reference_mode = SINGLE_REFERENCE; av1_zero(counts->comp_inter); #if !CONFIG_REF_ADAPT - } else if (single_count_zero == 0) { + // Use a flag that includes 4x4 blocks + } else if (rdc->single_ref_used_flag == 0) { cm->reference_mode = COMPOUND_REFERENCE; av1_zero(counts->comp_inter); #endif // !CONFIG_REF_ADAPT @@ -5046,10 +5474,15 @@ void av1_encode_frame(AV1_COMP *cpi) { #endif // CONFIG_EXT_INTER #if CONFIG_VAR_TX +#if CONFIG_RECT_TX_EXT + if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0 && + counts->quarter_tx_size[1] == 0) +#else if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0) +#endif cm->tx_mode = ALLOW_32X32 + CONFIG_TX64X64; #else -#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#if CONFIG_RECT_TX_EXT && CONFIG_EXT_TX if (cm->tx_mode == TX_MODE_SELECT && counts->quarter_tx_size[1] == 0) { #else if (cm->tx_mode == TX_MODE_SELECT) { @@ -5232,12 +5665,20 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd, const MODE_INFO *left_mi, const int intraonly, const int mi_row, const int mi_col) { const MB_MODE_INFO *const mbmi = &mi->mbmi; +#if CONFIG_ENTROPY_STATS const PREDICTION_MODE y_mode = mbmi->mode; - const PREDICTION_MODE uv_mode = mbmi->uv_mode; + const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode; +#else // CONFIG_ENTROPY_STATS + (void)counts; + (void)above_mi; + (void)left_mi; + (void)intraonly; +#endif // CONFIG_ENTROPY_STATS const BLOCK_SIZE bsize = mbmi->sb_type; const int unify_bsize = CONFIG_CB4X4; if (bsize < BLOCK_8X8 && !unify_bsize) { +#if CONFIG_ENTROPY_STATS int idx, idy; const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; @@ -5253,7 +5694,9 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd, ++counts->y_mode[0][bmode]; } } +#endif // CONFIG_ENTROPY_STATS } else { +#if CONFIG_ENTROPY_STATS if (intraonly) { const PREDICTION_MODE above = av1_above_block_mode(mi, above_mi, 0); const PREDICTION_MODE left = av1_left_block_mode(mi, left_mi, 0); @@ -5261,6 +5704,7 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd, } else { ++counts->y_mode[size_group_lookup[bsize]][y_mode]; } +#endif // CONFIG_ENTROPY_STATS #if CONFIG_FILTER_INTRA if (mbmi->mode == DC_PRED #if CONFIG_PALETTE @@ -5271,7 +5715,7 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd, mbmi->filter_intra_mode_info.use_filter_intra_mode[0]; ++counts->filter_intra[0][use_filter_intra_mode]; } - if (mbmi->uv_mode == DC_PRED + if (mbmi->uv_mode == UV_DC_PRED #if CONFIG_CB4X4 && is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x, @@ -5306,7 +5750,9 @@ static void sum_intra_stats(FRAME_COUNTS *counts, MACROBLOCKD *xd, (void)mi_col; (void)xd; #endif +#if CONFIG_ENTROPY_STATS ++counts->uv_mode[y_mode][uv_mode]; +#endif // CONFIG_ENTROPY_STATS } #if CONFIG_VAR_TX @@ -5325,9 +5771,17 @@ static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd, if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; +#if CONFIG_RECT_TX_EXT + if (tx_size == plane_tx_size || + mbmi->tx_size == quarter_txsize_lookup[mbmi->sb_type]) { +#else if (tx_size == plane_tx_size) { +#endif ++counts->txfm_partition[ctx][0]; - mbmi->tx_size = tx_size; +#if CONFIG_RECT_TX_EXT + if (tx_size == plane_tx_size) +#endif + mbmi->tx_size = tx_size; txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, tx_size, tx_size); } else { @@ -5438,18 +5892,22 @@ static void tx_partition_set_contexts(const AV1_COMMON *const cm, void av1_update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd, #if CONFIG_TXK_SEL - int block, int plane, + int blk_row, int blk_col, int block, int plane, #endif BLOCK_SIZE bsize, TX_SIZE tx_size, FRAME_COUNTS *counts) { MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; int is_inter = is_inter_block(mbmi); + #if !CONFIG_TXK_SEL TX_TYPE tx_type = mbmi->tx_type; #else + (void)blk_row; + (void)blk_col; // Only y plane's tx_type is updated if (plane > 0) return; - TX_TYPE tx_type = get_tx_type(PLANE_TYPE_Y, xd, block, tx_size); + TX_TYPE tx_type = + av1_get_tx_type(PLANE_TYPE_Y, xd, blk_row, blk_col, block, tx_size); #endif #if CONFIG_EXT_TX if (get_ext_tx_types(tx_size, bsize, is_inter, cm->reduced_tx_set_used) > 1 && @@ -5509,7 +5967,7 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, x->pvq_coded = (dry_run == OUTPUT_ENABLED) ? 1 : 0; #endif #if CONFIG_CFL - x->cfl_store_y = (dry_run == OUTPUT_ENABLED) ? 1 : 0; + x->cfl_store_y = 1; #endif if (!is_inter) { @@ -5526,13 +5984,8 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_PALETTE if (bsize >= BLOCK_8X8 && !dry_run) { for (plane = 0; plane <= 1; ++plane) { - if (mbmi->palette_mode_info.palette_size[plane] > 0) { - mbmi->palette_mode_info.palette_first_color_idx[plane] = - xd->plane[plane].color_index_map[0]; - // TODO(huisu): this increases the use of token buffer. Needs stretch - // test to verify. + if (mbmi->palette_mode_info.palette_size[plane] > 0) av1_tokenize_palette_sb(cpi, td, plane, t, dry_run, bsize, rate); - } } } #endif // CONFIG_PALETTE @@ -5559,9 +6012,21 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, &xd->block_refs[ref]->sf); } - av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, block_size); +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // Single ref compound mode + if (!is_compound && is_inter_singleref_comp_mode(mbmi->mode)) { + xd->block_refs[1] = xd->block_refs[0]; + YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[0]); +#if CONFIG_INTRABC + assert(IMPLIES(!is_intrabc_block(mbmi), cfg)); +#else + assert(cfg != NULL); +#endif // !CONFIG_INTRABC + av1_setup_pre_planes(xd, 1, cfg, mi_row, mi_col, &xd->block_refs[1]->sf); + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF - av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, NULL, block_size); + av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, block_size); #if CONFIG_MOTION_VAR if (mbmi->motion_mode == OBMC_CAUSAL) { #if CONFIG_NCOBMC @@ -5587,10 +6052,11 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, #endif } -#if CONFIG_DAALA_DIST && CONFIG_CB4X4 +#if CONFIG_DIST_8X8 && CONFIG_CB4X4 if (bsize < BLOCK_8X8) { - daala_dist_set_sub8x8_dst(x, x->decoded_8x8, bsize, block_size_wide[bsize], - block_size_high[bsize], mi_row, mi_col); + dist_8x8_set_sub8x8_dst(x, (uint8_t *)x->decoded_8x8, bsize, + block_size_wide[bsize], block_size_high[bsize], + mi_row, mi_col); } #endif @@ -5629,13 +6095,16 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, ++td->counts->tx_size[tx_size_cat][tx_size_ctx][depth]; #endif -#if CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT + +#if CONFIG_RECT_TX_EXT && (CONFIG_EXT_TX || CONFIG_VAR_TX) if (is_quarter_tx_allowed(xd, mbmi, is_inter) && - mbmi->tx_size != txsize_sqr_up_map[mbmi->tx_size]) { - ++td->counts->quarter_tx_size[mbmi->tx_size == - quarter_txsize_lookup[mbmi->sb_type]]; + quarter_txsize_lookup[bsize] != max_txsize_rect_lookup[bsize] && + (mbmi->tx_size == quarter_txsize_lookup[bsize] || + mbmi->tx_size == max_txsize_rect_lookup[bsize])) { + ++td->counts + ->quarter_tx_size[mbmi->tx_size == quarter_txsize_lookup[bsize]]; } -#endif // CONFIG_EXT_TX && CONFIG_RECT_TX && CONFIG_RECT_TX_EXT +#endif #if CONFIG_EXT_TX && CONFIG_RECT_TX assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi))); #endif // CONFIG_EXT_TX && CONFIG_RECT_TX @@ -5673,8 +6142,8 @@ static void encode_superblock(const AV1_COMP *const cpi, ThreadData *td, } ++td->counts->tx_size_totals[txsize_sqr_map[tx_size]]; - ++td->counts - ->tx_size_totals[txsize_sqr_map[get_uv_tx_size(mbmi, &xd->plane[1])]]; + ++td->counts->tx_size_totals[txsize_sqr_map[av1_get_uv_tx_size( + mbmi, &xd->plane[1])]]; #if !CONFIG_TXK_SEL av1_update_tx_type_count(cm, xd, bsize, tx_size, td->counts); #endif @@ -5837,7 +6306,7 @@ static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td, #if CONFIG_EXT_INTER int mi_row_ori, int mi_col_ori, #endif // CONFIG_EXT_INTER - int mi_row_pred, int mi_col_pred, + int mi_row_pred, int mi_col_pred, int plane, BLOCK_SIZE bsize_pred, int b_sub8x8, int block) { // Used in supertx // (mi_row_ori, mi_col_ori): location for mv @@ -5859,28 +6328,39 @@ static void predict_superblock(const AV1_COMP *const cpi, ThreadData *td, &xd->block_refs[ref]->sf); } +#if CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + // Single ref compound mode + if (!is_compound && is_inter_singleref_comp_mode(mbmi->mode)) { + xd->block_refs[1] = xd->block_refs[0]; + YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[0]); + av1_setup_pre_planes(xd, 1, cfg, mi_row_pred, mi_col_pred, + &xd->block_refs[1]->sf); + } +#endif // CONFIG_EXT_INTER && CONFIG_COMPOUND_SINGLEREF + if (!b_sub8x8) - av1_build_inter_predictors_sb_extend(cm, xd, + av1_build_inter_predictor_sb_extend(cm, xd, #if CONFIG_EXT_INTER - mi_row_ori, mi_col_ori, + mi_row_ori, mi_col_ori, #endif // CONFIG_EXT_INTER - mi_row_pred, mi_col_pred, bsize_pred); + mi_row_pred, mi_col_pred, plane, + bsize_pred); else - av1_build_inter_predictors_sb_sub8x8_extend(cm, xd, + av1_build_inter_predictor_sb_sub8x8_extend(cm, xd, #if CONFIG_EXT_INTER - mi_row_ori, mi_col_ori, + mi_row_ori, mi_col_ori, #endif // CONFIG_EXT_INTER - mi_row_pred, mi_col_pred, - bsize_pred, block); + mi_row_pred, mi_col_pred, plane, + bsize_pred, block); } static void predict_b_extend(const AV1_COMP *const cpi, ThreadData *td, const TileInfo *const tile, int block, int mi_row_ori, int mi_col_ori, int mi_row_pred, int mi_col_pred, int mi_row_top, int mi_col_top, - uint8_t *dst_buf[3], int dst_stride[3], + int plane, uint8_t *dst_buf, int dst_stride, BLOCK_SIZE bsize_top, BLOCK_SIZE bsize_pred, - RUN_TYPE dry_run, int b_sub8x8, int bextend) { + RUN_TYPE dry_run, int b_sub8x8) { // Used in supertx // (mi_row_ori, mi_col_ori): location for mv // (mi_row_pred, mi_col_pred, bsize_pred): region to predict @@ -5905,34 +6385,27 @@ static void predict_b_extend(const AV1_COMP *const cpi, ThreadData *td, set_offsets_extend(cpi, td, tile, mi_row_pred, mi_col_pred, mi_row_ori, mi_col_ori, bsize_pred); - xd->plane[0].dst.stride = dst_stride[0]; - xd->plane[1].dst.stride = dst_stride[1]; - xd->plane[2].dst.stride = dst_stride[2]; - xd->plane[0].dst.buf = dst_buf[0] + - (r >> xd->plane[0].subsampling_y) * dst_stride[0] + - (c >> xd->plane[0].subsampling_x); - xd->plane[1].dst.buf = dst_buf[1] + - (r >> xd->plane[1].subsampling_y) * dst_stride[1] + - (c >> xd->plane[1].subsampling_x); - xd->plane[2].dst.buf = dst_buf[2] + - (r >> xd->plane[2].subsampling_y) * dst_stride[2] + - (c >> xd->plane[2].subsampling_x); + xd->plane[plane].dst.stride = dst_stride; + xd->plane[plane].dst.buf = + dst_buf + (r >> xd->plane[plane].subsampling_y) * dst_stride + + (c >> xd->plane[plane].subsampling_x); predict_superblock(cpi, td, #if CONFIG_EXT_INTER mi_row_ori, mi_col_ori, #endif // CONFIG_EXT_INTER - mi_row_pred, mi_col_pred, bsize_pred, b_sub8x8, block); + mi_row_pred, mi_col_pred, plane, bsize_pred, b_sub8x8, + block); - if (!dry_run && !bextend) + if (!dry_run && (plane == 0) && (block == 0 || !b_sub8x8)) update_stats(&cpi->common, td, mi_row_pred, mi_col_pred, 1); } static void extend_dir(const AV1_COMP *const cpi, ThreadData *td, const TileInfo *const tile, int block, BLOCK_SIZE bsize, - BLOCK_SIZE top_bsize, int mi_row, int mi_col, - int mi_row_top, int mi_col_top, RUN_TYPE dry_run, - uint8_t *dst_buf[3], int dst_stride[3], int dir) { + BLOCK_SIZE top_bsize, int mi_row_ori, int mi_col_ori, + int mi_row, int mi_col, int mi_row_top, int mi_col_top, + int plane, uint8_t *dst_buf, int dst_stride, int dir) { // dir: 0-lower, 1-upper, 2-left, 3-right // 4-lowerleft, 5-upperleft, 6-lowerright, 7-upperright MACROBLOCKD *xd = &td->mb.e_mbd; @@ -5973,10 +6446,10 @@ static void extend_dir(const AV1_COMP *const cpi, ThreadData *td, for (j = 0; j < mi_height + ext_offset; j += high_unit) for (i = 0; i < mi_width + ext_offset; i += wide_unit) - predict_b_extend(cpi, td, tile, block, mi_row, mi_col, mi_row_pred + j, - mi_col_pred + i, mi_row_top, mi_col_top, dst_buf, - dst_stride, top_bsize, extend_bsize, dry_run, b_sub8x8, - 1); + predict_b_extend(cpi, td, tile, block, mi_row_ori, mi_col_ori, + mi_row_pred + j, mi_col_pred + i, mi_row_top, + mi_col_top, plane, dst_buf, dst_stride, top_bsize, + extend_bsize, 1, b_sub8x8); } else if (dir == 2 || dir == 3) { // left and right extend_bsize = (mi_height == mi_size_high[BLOCK_8X8] || bsize < BLOCK_8X8 || yss < xss) @@ -5996,10 +6469,10 @@ static void extend_dir(const AV1_COMP *const cpi, ThreadData *td, for (j = 0; j < mi_height + ext_offset; j += high_unit) for (i = 0; i < mi_width + ext_offset; i += wide_unit) - predict_b_extend(cpi, td, tile, block, mi_row, mi_col, mi_row_pred + j, - mi_col_pred + i, mi_row_top, mi_col_top, dst_buf, - dst_stride, top_bsize, extend_bsize, dry_run, b_sub8x8, - 1); + predict_b_extend(cpi, td, tile, block, mi_row_ori, mi_col_ori, + mi_row_pred + j, mi_col_pred + i, mi_row_top, + mi_col_top, plane, dst_buf, dst_stride, top_bsize, + extend_bsize, 1, b_sub8x8); } else { extend_bsize = BLOCK_8X8; #if CONFIG_CB4X4 @@ -6018,35 +6491,24 @@ static void extend_dir(const AV1_COMP *const cpi, ThreadData *td, for (j = 0; j < mi_height + ext_offset; j += high_unit) for (i = 0; i < mi_width + ext_offset; i += wide_unit) - predict_b_extend(cpi, td, tile, block, mi_row, mi_col, mi_row_pred + j, - mi_col_pred + i, mi_row_top, mi_col_top, dst_buf, - dst_stride, top_bsize, extend_bsize, dry_run, b_sub8x8, - 1); + predict_b_extend(cpi, td, tile, block, mi_row_ori, mi_col_ori, + mi_row_pred + j, mi_col_pred + i, mi_row_top, + mi_col_top, plane, dst_buf, dst_stride, top_bsize, + extend_bsize, 1, b_sub8x8); } } static void extend_all(const AV1_COMP *const cpi, ThreadData *td, const TileInfo *const tile, int block, BLOCK_SIZE bsize, - BLOCK_SIZE top_bsize, int mi_row, int mi_col, - int mi_row_top, int mi_col_top, RUN_TYPE dry_run, - uint8_t *dst_buf[3], int dst_stride[3]) { + BLOCK_SIZE top_bsize, int mi_row_ori, int mi_col_ori, + int mi_row, int mi_col, int mi_row_top, int mi_col_top, + int plane, uint8_t *dst_buf, int dst_stride) { assert(block >= 0 && block < 4); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 0); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 1); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 2); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 3); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 4); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 5); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 6); - extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride, 7); + for (int i = 0; i < 8; ++i) { + extend_dir(cpi, td, tile, block, bsize, top_bsize, mi_row_ori, mi_col_ori, + mi_row, mi_col, mi_row_top, mi_col_top, plane, dst_buf, + dst_stride, i); + } } // This function generates prediction for multiple blocks, between which @@ -6140,29 +6602,36 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td, switch (partition) { case PARTITION_NONE: assert(bsize < top_bsize); - predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize, - bsize, dry_run, 0, 0); - extend_all(cpi, td, tile, 0, bsize, top_bsize, mi_row, mi_col, mi_row_top, - mi_col_top, dry_run, dst_buf, dst_stride); + for (i = 0; i < MAX_MB_PLANE; ++i) { + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i], + top_bsize, bsize, dry_run, 0); + extend_all(cpi, td, tile, 0, bsize, top_bsize, mi_row, mi_col, mi_row, + mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + } break; case PARTITION_HORZ: if (bsize == BLOCK_8X8 && !unify_bsize) { - // Fisrt half - predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize, - BLOCK_8X8, dry_run, 1, 0); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride); - - // Second half - predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf1, dst_stride1, - top_bsize, BLOCK_8X8, dry_run, 1, 1); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf1, dst_stride1); + for (i = 0; i < MAX_MB_PLANE; ++i) { + // First half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i], + top_bsize, BLOCK_8X8, dry_run, 1); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + + // Second half + predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf1[i], + dst_stride1[i], top_bsize, BLOCK_8X8, dry_run, 1); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf1[i], + dst_stride1[i]); + } // Smooth xd->plane[0].dst.buf = dst_buf[0]; @@ -6172,60 +6641,89 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td, mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_HORZ, 0); } else { - // First half - predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize, - subsize, dry_run, 0, 0); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride); - else - extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride, 0); - - if (mi_row + hbs < cm->mi_rows) { - // Second half - predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, mi_row + hbs, - mi_col, mi_row_top, mi_col_top, dst_buf1, - dst_stride1, top_bsize, subsize, dry_run, 0, 0); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, - mi_col, mi_row_top, mi_col_top, dry_run, dst_buf1, - dst_stride1); - else - extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, - mi_col, mi_row_top, mi_col_top, dry_run, dst_buf1, - dst_stride1, 1); - - // Smooth - for (i = 0; i < MAX_MB_PLANE; i++) { + for (i = 0; i < MAX_MB_PLANE; ++i) { +#if CONFIG_CB4X4 + const struct macroblockd_plane *pd = &xd->plane[i]; + int handle_chroma_sub8x8 = need_handle_chroma_sub8x8( + subsize, pd->subsampling_x, pd->subsampling_y); + + if (handle_chroma_sub8x8) { + int mode_offset_row = CONFIG_CHROMA_SUB8X8 ? hbs : 0; + + predict_b_extend(cpi, td, tile, 0, mi_row + mode_offset_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, + dst_buf[i], dst_stride[i], top_bsize, bsize, + dry_run, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, bsize, top_bsize, + mi_row + mode_offset_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i]); + } else { +#endif + // First half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i], top_bsize, subsize, dry_run, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i], 0); xd->plane[i].dst.buf = dst_buf[i]; xd->plane[i].dst.stride = dst_stride[i]; - av1_build_masked_inter_predictor_complex( - xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], - mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize, - PARTITION_HORZ, i); + + if (mi_row + hbs < cm->mi_rows) { + // Second half + predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, + mi_row + hbs, mi_col, mi_row_top, mi_col_top, i, + dst_buf1[i], dst_stride1[i], top_bsize, subsize, + dry_run, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, + mi_col, mi_row + hbs, mi_col, mi_row_top, mi_col_top, + i, dst_buf1[i], dst_stride1[i]); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, + mi_col, mi_row + hbs, mi_col, mi_row_top, mi_col_top, + i, dst_buf1[i], dst_stride1[i], 1); + // Smooth + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + av1_build_masked_inter_predictor_complex( + xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], + mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize, + PARTITION_HORZ, i); + } +#if CONFIG_CB4X4 } +#endif } } break; case PARTITION_VERT: if (bsize == BLOCK_8X8 && !unify_bsize) { - // First half - predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize, - BLOCK_8X8, dry_run, 1, 0); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride); - - // Second half - predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf1, dst_stride1, - top_bsize, BLOCK_8X8, dry_run, 1, 1); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf1, dst_stride1); + for (i = 0; i < MAX_MB_PLANE; ++i) { + // First half + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i], + top_bsize, BLOCK_8X8, dry_run, 1); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + + // Second half + predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf1[i], + dst_stride1[i], top_bsize, BLOCK_8X8, dry_run, 1); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf1[i], + dst_stride1[i]); + } // Smooth xd->plane[0].dst.buf = dst_buf[0]; @@ -6235,66 +6733,160 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td, mi_col, mi_row_top, mi_col_top, bsize, top_bsize, PARTITION_VERT, 0); } else { - // bsize: not important, not useful - predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize, - subsize, dry_run, 0, 0); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride); - else - extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride, 3); - - if (mi_col + hbs < cm->mi_cols) { - predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row, - mi_col + hbs, mi_row_top, mi_col_top, dst_buf1, - dst_stride1, top_bsize, subsize, dry_run, 0, 0); - if (bsize < top_bsize) - extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, - mi_col + hbs, mi_row_top, mi_col_top, dry_run, dst_buf1, - dst_stride1); - else - extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, - mi_col + hbs, mi_row_top, mi_col_top, dry_run, dst_buf1, - dst_stride1, 2); - - for (i = 0; i < MAX_MB_PLANE; i++) { + for (i = 0; i < MAX_MB_PLANE; ++i) { +#if CONFIG_CB4X4 + const struct macroblockd_plane *pd = &xd->plane[i]; + int handle_chroma_sub8x8 = need_handle_chroma_sub8x8( + subsize, pd->subsampling_x, pd->subsampling_y); + + if (handle_chroma_sub8x8) { + int mode_offset_col = CONFIG_CHROMA_SUB8X8 ? hbs : 0; + + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + mode_offset_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, + dst_buf[i], dst_stride[i], top_bsize, bsize, + dry_run, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, bsize, top_bsize, mi_row, + mi_col + mode_offset_col, mi_row, mi_col, mi_row_top, + mi_col_top, i, dst_buf[i], dst_stride[i]); + } else { +#endif + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i], top_bsize, subsize, dry_run, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i], 3); xd->plane[i].dst.buf = dst_buf[i]; xd->plane[i].dst.stride = dst_stride[i]; - av1_build_masked_inter_predictor_complex( - xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], - mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize, - PARTITION_VERT, i); + + if (mi_col + hbs < cm->mi_cols) { + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row, + mi_col + hbs, mi_row_top, mi_col_top, i, + dst_buf1[i], dst_stride1[i], top_bsize, subsize, + dry_run, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, + mi_col + hbs, mi_row, mi_col + hbs, mi_row_top, + mi_col_top, i, dst_buf1[i], dst_stride1[i]); + else + extend_dir(cpi, td, tile, 0, subsize, top_bsize, mi_row, + mi_col + hbs, mi_row, mi_col + hbs, mi_row_top, + mi_col_top, i, dst_buf1[i], dst_stride1[i], 2); + + // smooth + xd->plane[i].dst.buf = dst_buf[i]; + xd->plane[i].dst.stride = dst_stride[i]; + av1_build_masked_inter_predictor_complex( + xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], + mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize, + PARTITION_VERT, i); + } +#if CONFIG_CB4X4 } +#endif } } break; case PARTITION_SPLIT: if (bsize == BLOCK_8X8 && !unify_bsize) { - predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf, dst_stride, top_bsize, - BLOCK_8X8, dry_run, 1, 0); - predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf1, dst_stride1, - top_bsize, BLOCK_8X8, dry_run, 1, 1); - predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf2, dst_stride2, - top_bsize, BLOCK_8X8, dry_run, 1, 1); - predict_b_extend(cpi, td, tile, 3, mi_row, mi_col, mi_row, mi_col, - mi_row_top, mi_col_top, dst_buf3, dst_stride3, - top_bsize, BLOCK_8X8, dry_run, 1, 1); - - if (bsize < top_bsize) { - extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf, dst_stride); - extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf1, dst_stride1); - extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf2, dst_stride2); - extend_all(cpi, td, tile, 3, subsize, top_bsize, mi_row, mi_col, - mi_row_top, mi_col_top, dry_run, dst_buf3, dst_stride3); + for (i = 0; i < MAX_MB_PLANE; i++) { + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], dst_stride[i], + top_bsize, BLOCK_8X8, dry_run, 1); + predict_b_extend(cpi, td, tile, 1, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf1[i], + dst_stride1[i], top_bsize, BLOCK_8X8, dry_run, 1); + predict_b_extend(cpi, td, tile, 2, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf2[i], + dst_stride2[i], top_bsize, BLOCK_8X8, dry_run, 1); + predict_b_extend(cpi, td, tile, 3, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf3[i], + dst_stride3[i], top_bsize, BLOCK_8X8, dry_run, 1); + + if (bsize < top_bsize) { + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + extend_all(cpi, td, tile, 1, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf1[i], + dst_stride1[i]); + extend_all(cpi, td, tile, 2, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf2[i], + dst_stride2[i]); + extend_all(cpi, td, tile, 3, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf3[i], + dst_stride3[i]); + } + } +#if CONFIG_CB4X4 + } else if (bsize == BLOCK_8X8) { + for (i = 0; i < MAX_MB_PLANE; i++) { + const struct macroblockd_plane *pd = &xd->plane[i]; + int handle_chroma_sub8x8 = need_handle_chroma_sub8x8( + subsize, pd->subsampling_x, pd->subsampling_y); + + if (handle_chroma_sub8x8) { + int mode_offset_row = + CONFIG_CHROMA_SUB8X8 && mi_row + hbs < cm->mi_rows ? hbs : 0; + int mode_offset_col = + CONFIG_CHROMA_SUB8X8 && mi_col + hbs < cm->mi_cols ? hbs : 0; + + predict_b_extend(cpi, td, tile, 0, mi_row + mode_offset_row, + mi_col + mode_offset_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i], top_bsize, BLOCK_8X8, dry_run, 0); + if (bsize < top_bsize) + extend_all(cpi, td, tile, 0, BLOCK_8X8, top_bsize, + mi_row + mode_offset_row, mi_col + mode_offset_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + } else { + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col, mi_row, mi_col, + mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i], top_bsize, subsize, dry_run, 0); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_b_extend(cpi, td, tile, 0, mi_row, mi_col + hbs, mi_row, + mi_col + hbs, mi_row_top, mi_col_top, i, + dst_buf1[i], dst_stride1[i], top_bsize, subsize, + dry_run, 0); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col, + mi_row + hbs, mi_col, mi_row_top, mi_col_top, i, + dst_buf2[i], dst_stride2[i], top_bsize, subsize, + dry_run, 0); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + predict_b_extend(cpi, td, tile, 0, mi_row + hbs, mi_col + hbs, + mi_row + hbs, mi_col + hbs, mi_row_top, + mi_col_top, i, dst_buf3[i], dst_stride3[i], + top_bsize, subsize, dry_run, 0); + + if (bsize < top_bsize) { + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, mi_col, + mi_row, mi_col, mi_row_top, mi_col_top, i, dst_buf[i], + dst_stride[i]); + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row, + mi_col + hbs, mi_row, mi_col + hbs, mi_row_top, + mi_col_top, i, dst_buf1[i], dst_stride1[i]); + if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, + mi_col, mi_row + hbs, mi_col, mi_row_top, mi_col_top, + i, dst_buf2[i], dst_stride2[i]); + if (mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) + extend_all(cpi, td, tile, 0, subsize, top_bsize, mi_row + hbs, + mi_col + hbs, mi_row + hbs, mi_col + hbs, mi_row_top, + mi_col_top, i, dst_buf3[i], dst_stride3[i]); + } + } } +#endif } else { predict_sb_complex(cpi, td, tile, mi_row, mi_col, mi_row_top, mi_col_top, dry_run, subsize, top_bsize, dst_buf, @@ -6314,10 +6906,16 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td, pc_tree->split[3]); } for (i = 0; i < MAX_MB_PLANE; i++) { -#if !CONFIG_CB4X4 +#if CONFIG_CB4X4 + const struct macroblockd_plane *pd = &xd->plane[i]; + int handle_chroma_sub8x8 = need_handle_chroma_sub8x8( + subsize, pd->subsampling_x, pd->subsampling_y); + if (handle_chroma_sub8x8) continue; // Skip <4x4 chroma smoothing +#else if (bsize == BLOCK_8X8 && i != 0) continue; // Skip <4x4 chroma smoothing #endif + if (mi_row < cm->mi_rows && mi_col + hbs < cm->mi_cols) { av1_build_masked_inter_predictor_complex( xd, dst_buf[i], dst_stride[i], dst_buf1[i], dst_stride1[i], @@ -6334,9 +6932,6 @@ static void predict_sb_complex(const AV1_COMP *const cpi, ThreadData *td, PARTITION_HORZ, i); } } else if (mi_row + hbs < cm->mi_rows && mi_col < cm->mi_cols) { - if (bsize == BLOCK_8X8 && i != 0) - continue; // Skip <4x4 chroma smoothing - av1_build_masked_inter_predictor_complex( xd, dst_buf[i], dst_stride[i], dst_buf2[i], dst_stride2[i], mi_row, mi_col, mi_row_top, mi_col_top, bsize, top_bsize, @@ -6660,8 +7255,7 @@ static void rd_supertx_sb(const AV1_COMP *const cpi, ThreadData *td, *tmp_rate = av1_cost_bit(av1_get_skip_prob(cm, xd), 1); x->skip = 1; } else { - if (RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist) < - RDCOST(x->rdmult, x->rddiv, 0, sse)) { + if (RDCOST(x->rdmult, *tmp_rate, *tmp_dist) < RDCOST(x->rdmult, 0, sse)) { *tmp_rate += av1_cost_bit(av1_get_skip_prob(cm, xd), 0); x->skip = 0; } else { @@ -6671,7 +7265,7 @@ static void rd_supertx_sb(const AV1_COMP *const cpi, ThreadData *td, } } *tmp_rate += base_rate; - rd_tx = RDCOST(x->rdmult, x->rddiv, *tmp_rate, *tmp_dist); + rd_tx = RDCOST(x->rdmult, *tmp_rate, *tmp_dist); if (rd_tx < bestrd_tx * 0.99 || tx_type == DCT_DCT) { *best_tx = tx_type; bestrd_tx = rd_tx; |