diff options
Diffstat (limited to 'third_party/aom/av1/decoder/decodeframe.c')
-rw-r--r-- | third_party/aom/av1/decoder/decodeframe.c | 555 |
1 files changed, 423 insertions, 132 deletions
diff --git a/third_party/aom/av1/decoder/decodeframe.c b/third_party/aom/av1/decoder/decodeframe.c index 6dbc4f3eb..31f14b531 100644 --- a/third_party/aom/av1/decoder/decodeframe.c +++ b/third_party/aom/av1/decoder/decodeframe.c @@ -43,6 +43,7 @@ #include "av1/common/entropy.h" #include "av1/common/entropymode.h" #include "av1/common/entropymv.h" +#include "av1/common/frame_buffers.h" #include "av1/common/idct.h" #include "av1/common/mvref_common.h" #include "av1/common/pred_common.h" @@ -87,18 +88,25 @@ int av1_check_trailing_bits(AV1Decoder *pbi, struct aom_read_bit_buffer *rb) { static void set_planes_to_neutral_grey(const SequenceHeader *const seq_params, const YV12_BUFFER_CONFIG *const buf, int only_chroma) { - const int val = 1 << (seq_params->bit_depth - 1); - - for (int plane = only_chroma; plane < MAX_MB_PLANE; plane++) { - const int is_uv = plane > 0; - for (int row_idx = 0; row_idx < buf->crop_heights[is_uv]; row_idx++) { - if (seq_params->use_highbitdepth) { - // TODO(yaowu): replace this with aom_memset16() for speed - for (int col_idx = 0; col_idx < buf->crop_widths[is_uv]; col_idx++) { - uint16_t *base = CONVERT_TO_SHORTPTR(buf->buffers[plane]); - base[row_idx * buf->strides[is_uv] + col_idx] = val; + if (seq_params->use_highbitdepth) { + const int val = 1 << (seq_params->bit_depth - 1); + for (int plane = only_chroma; plane < MAX_MB_PLANE; plane++) { + const int is_uv = plane > 0; + uint16_t *const base = CONVERT_TO_SHORTPTR(buf->buffers[plane]); + // Set the first row to neutral grey. Then copy the first row to all + // subsequent rows. + if (buf->crop_heights[is_uv] > 0) { + aom_memset16(base, val, buf->crop_widths[is_uv]); + for (int row_idx = 1; row_idx < buf->crop_heights[is_uv]; row_idx++) { + memcpy(&base[row_idx * buf->strides[is_uv]], base, + sizeof(*base) * buf->crop_widths[is_uv]); } - } else { + } + } + } else { + for (int plane = only_chroma; plane < MAX_MB_PLANE; plane++) { + const int is_uv = plane > 0; + for (int row_idx = 0; row_idx < buf->crop_heights[is_uv]; row_idx++) { memset(&buf->buffers[plane][row_idx * buf->uv_stride], 1 << 7, buf->crop_widths[is_uv]); } @@ -687,11 +695,10 @@ static INLINE void dec_build_inter_predictors(const AV1_COMMON *cm, for (int x = 0; x < b8_w; x += b4_w) { MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col]; is_compound = has_second_ref(this_mbmi); - DECLARE_ALIGNED(32, CONV_BUF_TYPE, tmp_dst[8 * 8]); int tmp_dst_stride = 8; assert(bw < 8 || bh < 8); ConvolveParams conv_params = get_conv_params_no_round( - 0, 0, plane, tmp_dst, tmp_dst_stride, is_compound, xd->bd); + 0, plane, xd->tmp_conv_dst, tmp_dst_stride, is_compound, xd->bd); conv_params.use_jnt_comp_avg = 0; struct buf_2d *const dst_buf = &pd->dst; uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x; @@ -735,7 +742,6 @@ static INLINE void dec_build_inter_predictors(const AV1_COMMON *cm, extend_mc_border(sf, pre_buf, scaled_mv, block, subpel_x_mv, subpel_y_mv, 0, is_intrabc, highbd, xd->mc_buf[ref], &pre, &src_stride); - conv_params.ref = ref; conv_params.do_average = ref; if (is_masked_compound_type(mi->interinter_comp.type)) { // masked compound type has its own average mechanism @@ -762,7 +768,6 @@ static INLINE void dec_build_inter_predictors(const AV1_COMMON *cm, uint8_t *const dst = dst_buf->buf; uint8_t *pre[2]; SubpelParams subpel_params[2]; - DECLARE_ALIGNED(32, uint16_t, tmp_dst[MAX_SB_SIZE * MAX_SB_SIZE]); int src_stride[2]; for (ref = 0; ref < 1 + is_compound; ++ref) { const struct scale_factors *const sf = @@ -797,7 +802,7 @@ static INLINE void dec_build_inter_predictors(const AV1_COMMON *cm, } ConvolveParams conv_params = get_conv_params_no_round( - 0, 0, plane, tmp_dst, MAX_SB_SIZE, is_compound, xd->bd); + 0, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd); av1_jnt_comp_weight_assign(cm, mi, 0, &conv_params.fwd_offset, &conv_params.bck_offset, &conv_params.use_jnt_comp_avg, is_compound); @@ -808,7 +813,6 @@ static INLINE void dec_build_inter_predictors(const AV1_COMMON *cm, WarpTypesAllowed warp_types; warp_types.global_warp_allowed = is_global[ref]; warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL; - conv_params.ref = ref; conv_params.do_average = ref; if (is_masked_compound_type(mi->interinter_comp.type)) { // masked compound type has its own average mechanism @@ -931,7 +935,7 @@ static void dec_build_prediction_by_above_preds( // Adjust mb_to_bottom_edge to have the correct value for the OBMC // prediction block. This is half the height of the original block, // except for 128-wide blocks, where we only use a height of 32. - int this_height = xd->n8_h * MI_SIZE; + int this_height = xd->n4_h * MI_SIZE; int pred_height = AOMMIN(this_height / 2, 32); xd->mb_to_bottom_edge += (this_height - pred_height) * 8; @@ -984,7 +988,7 @@ static void dec_build_prediction_by_left_preds( // Adjust mb_to_right_edge to have the correct value for the OBMC // prediction block. This is half the width of the original block, // except for 128-wide blocks, where we only use a width of 32. - int this_width = xd->n8_w * MI_SIZE; + int this_width = xd->n4_w * MI_SIZE; int pred_width = AOMMIN(this_width / 2, 32); xd->mb_to_right_edge += (this_width - pred_width) * 8; @@ -1006,8 +1010,6 @@ static void dec_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col) { const int num_planes = av1_num_planes(cm); - DECLARE_ALIGNED(16, uint8_t, tmp_buf1[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); - DECLARE_ALIGNED(16, uint8_t, tmp_buf2[2 * MAX_MB_PLANE * MAX_SB_SQUARE]); uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; @@ -1018,19 +1020,23 @@ static void dec_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int len = sizeof(uint16_t); - dst_buf1[0] = CONVERT_TO_BYTEPTR(tmp_buf1); - dst_buf1[1] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * len); - dst_buf1[2] = CONVERT_TO_BYTEPTR(tmp_buf1 + MAX_SB_SQUARE * 2 * len); - dst_buf2[0] = CONVERT_TO_BYTEPTR(tmp_buf2); - dst_buf2[1] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * len); - dst_buf2[2] = CONVERT_TO_BYTEPTR(tmp_buf2 + MAX_SB_SQUARE * 2 * len); + dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]); + dst_buf1[1] = + CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len); + dst_buf1[2] = + CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len); + dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]); + dst_buf2[1] = + CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len); + dst_buf2[2] = + CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len); } else { - dst_buf1[0] = tmp_buf1; - dst_buf1[1] = tmp_buf1 + MAX_SB_SQUARE; - dst_buf1[2] = tmp_buf1 + MAX_SB_SQUARE * 2; - dst_buf2[0] = tmp_buf2; - dst_buf2[1] = tmp_buf2 + MAX_SB_SQUARE; - dst_buf2[2] = tmp_buf2 + MAX_SB_SQUARE * 2; + dst_buf1[0] = xd->tmp_obmc_bufs[0]; + dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE; + dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2; + dst_buf2[0] = xd->tmp_obmc_bufs[1]; + dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE; + dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2; } dec_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, dst_buf1, dst_width1, dst_height1, dst_stride1); @@ -1069,8 +1075,9 @@ static void predict_inter_block(AV1_COMMON *const cm, MACROBLOCKD *const xd, } dec_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); - if (mbmi->motion_mode == OBMC_CAUSAL) + if (mbmi->motion_mode == OBMC_CAUSAL) { dec_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); + } #if CONFIG_MISMATCH_DEBUG for (int plane = 0; plane < num_planes; ++plane) { const struct macroblockd_plane *pd = &xd->plane[plane]; @@ -1225,9 +1232,18 @@ static void decode_token_recon_block(AV1Decoder *const pbi, set_color_index_map_offset); } +#if LOOP_FILTER_BITMASK +static void store_bitmask_vartx(AV1_COMMON *cm, int mi_row, int mi_col, + BLOCK_SIZE bsize, TX_SIZE tx_size, + MB_MODE_INFO *mbmi); +#endif + static void read_tx_size_vartx(MACROBLOCKD *xd, MB_MODE_INFO *mbmi, - TX_SIZE tx_size, int depth, int blk_row, - int blk_col, aom_reader *r) { + TX_SIZE tx_size, int depth, +#if LOOP_FILTER_BITMASK + AV1_COMMON *cm, int mi_row, int mi_col, +#endif + int blk_row, int blk_col, aom_reader *r) { FRAME_CONTEXT *ec_ctx = xd->tile_ctx; int is_split = 0; const BLOCK_SIZE bsize = mbmi->sb_type; @@ -1271,15 +1287,29 @@ static void read_tx_size_vartx(MACROBLOCKD *xd, MB_MODE_INFO *mbmi, mbmi->tx_size = sub_txs; txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, sub_txs, tx_size); +#if LOOP_FILTER_BITMASK + store_bitmask_vartx(cm, mi_row + blk_row, mi_col + blk_col, BLOCK_8X8, + TX_4X4, mbmi); +#endif return; } +#if LOOP_FILTER_BITMASK + if (depth + 1 == MAX_VARTX_DEPTH) { + store_bitmask_vartx(cm, mi_row + blk_row, mi_col + blk_col, + txsize_to_bsize[tx_size], sub_txs, mbmi); + } +#endif assert(bsw > 0 && bsh > 0); for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) { for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) { int offsetr = blk_row + row; int offsetc = blk_col + col; - read_tx_size_vartx(xd, mbmi, sub_txs, depth + 1, offsetr, offsetc, r); + read_tx_size_vartx(xd, mbmi, sub_txs, depth + 1, +#if LOOP_FILTER_BITMASK + cm, mi_row, mi_col, +#endif + offsetr, offsetc, r); } } } else { @@ -1293,6 +1323,10 @@ static void read_tx_size_vartx(MACROBLOCKD *xd, MB_MODE_INFO *mbmi, mbmi->tx_size = tx_size; txfm_partition_update(xd->above_txfm_context + blk_col, xd->left_txfm_context + blk_row, tx_size, tx_size); +#if LOOP_FILTER_BITMASK + store_bitmask_vartx(cm, mi_row + blk_row, mi_col + blk_col, + txsize_to_bsize[tx_size], tx_size, mbmi); +#endif } } @@ -1330,6 +1364,191 @@ static TX_SIZE read_tx_size(AV1_COMMON *cm, MACROBLOCKD *xd, int is_inter, } } +#if LOOP_FILTER_BITMASK +static void store_bitmask_vartx(AV1_COMMON *cm, int mi_row, int mi_col, + BLOCK_SIZE bsize, TX_SIZE tx_size, + MB_MODE_INFO *mbmi) { + LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col); + const TX_SIZE tx_size_y_vert = txsize_vert_map[tx_size]; + const TX_SIZE tx_size_y_horz = txsize_horz_map[tx_size]; + const TX_SIZE tx_size_uv_vert = txsize_vert_map[av1_get_max_uv_txsize( + mbmi->sb_type, cm->seq_params.subsampling_x, + cm->seq_params.subsampling_y)]; + const TX_SIZE tx_size_uv_horz = txsize_horz_map[av1_get_max_uv_txsize( + mbmi->sb_type, cm->seq_params.subsampling_x, + cm->seq_params.subsampling_y)]; + const int is_square_transform_size = tx_size <= TX_64X64; + int mask_id = 0; + int offset = 0; + const int half_ratio_tx_size_max32 = + (tx_size > TX_64X64) & (tx_size <= TX_32X16); + if (is_square_transform_size) { + switch (tx_size) { + case TX_4X4: mask_id = mask_id_table_tx_4x4[bsize]; break; + case TX_8X8: + mask_id = mask_id_table_tx_8x8[bsize]; + offset = 19; + break; + case TX_16X16: + mask_id = mask_id_table_tx_16x16[bsize]; + offset = 33; + break; + case TX_32X32: + mask_id = mask_id_table_tx_32x32[bsize]; + offset = 42; + break; + case TX_64X64: mask_id = 46; break; + default: assert(!is_square_transform_size); return; + } + mask_id += offset; + } else if (half_ratio_tx_size_max32) { + int tx_size_equal_block_size = bsize == txsize_to_bsize[tx_size]; + mask_id = 47 + 2 * (tx_size - TX_4X8) + (tx_size_equal_block_size ? 0 : 1); + } else if (tx_size == TX_32X64) { + mask_id = 59; + } else if (tx_size == TX_64X32) { + mask_id = 60; + } else { // quarter ratio tx size + mask_id = 61 + (tx_size - TX_4X16); + } + int index = 0; + const int row = mi_row % MI_SIZE_64X64; + const int col = mi_col % MI_SIZE_64X64; + const int shift = get_index_shift(col, row, &index); + const int vert_shift = tx_size_y_vert <= TX_8X8 ? shift : col; + for (int i = 0; i + index < 4; ++i) { + // y vertical. + lfm->tx_size_ver[0][tx_size_y_horz].bits[i + index] |= + (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift); + // y horizontal. + lfm->tx_size_hor[0][tx_size_y_vert].bits[i + index] |= + (above_mask_univariant_reordered[mask_id].bits[i] << shift); + // u/v vertical. + lfm->tx_size_ver[1][tx_size_uv_horz].bits[i + index] |= + (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift); + // u/v horizontal. + lfm->tx_size_hor[1][tx_size_uv_vert].bits[i + index] |= + (above_mask_univariant_reordered[mask_id].bits[i] << shift); + } +} + +static void store_bitmask_univariant_tx(AV1_COMMON *cm, int mi_row, int mi_col, + BLOCK_SIZE bsize, MB_MODE_INFO *mbmi) { + // Use a lookup table that provides one bitmask for a given block size and + // a univariant transform size. + int index; + int shift; + int row; + int col; + LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col); + const TX_SIZE tx_size_y_vert = txsize_vert_map[mbmi->tx_size]; + const TX_SIZE tx_size_y_horz = txsize_horz_map[mbmi->tx_size]; + const TX_SIZE tx_size_uv_vert = txsize_vert_map[av1_get_max_uv_txsize( + mbmi->sb_type, cm->seq_params.subsampling_x, + cm->seq_params.subsampling_y)]; + const TX_SIZE tx_size_uv_horz = txsize_horz_map[av1_get_max_uv_txsize( + mbmi->sb_type, cm->seq_params.subsampling_x, + cm->seq_params.subsampling_y)]; + const int is_square_transform_size = mbmi->tx_size <= TX_64X64; + int mask_id = 0; + int offset = 0; + const int half_ratio_tx_size_max32 = + (mbmi->tx_size > TX_64X64) & (mbmi->tx_size <= TX_32X16); + if (is_square_transform_size) { + switch (mbmi->tx_size) { + case TX_4X4: mask_id = mask_id_table_tx_4x4[bsize]; break; + case TX_8X8: + mask_id = mask_id_table_tx_8x8[bsize]; + offset = 19; + break; + case TX_16X16: + mask_id = mask_id_table_tx_16x16[bsize]; + offset = 33; + break; + case TX_32X32: + mask_id = mask_id_table_tx_32x32[bsize]; + offset = 42; + break; + case TX_64X64: mask_id = 46; break; + default: assert(!is_square_transform_size); return; + } + mask_id += offset; + } else if (half_ratio_tx_size_max32) { + int tx_size_equal_block_size = bsize == txsize_to_bsize[mbmi->tx_size]; + mask_id = + 47 + 2 * (mbmi->tx_size - TX_4X8) + (tx_size_equal_block_size ? 0 : 1); + } else if (mbmi->tx_size == TX_32X64) { + mask_id = 59; + } else if (mbmi->tx_size == TX_64X32) { + mask_id = 60; + } else { // quarter ratio tx size + mask_id = 61 + (mbmi->tx_size - TX_4X16); + } + row = mi_row % MI_SIZE_64X64; + col = mi_col % MI_SIZE_64X64; + shift = get_index_shift(col, row, &index); + const int vert_shift = tx_size_y_vert <= TX_8X8 ? shift : col; + for (int i = 0; i + index < 4; ++i) { + // y vertical. + lfm->tx_size_ver[0][tx_size_y_horz].bits[i + index] |= + (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift); + // y horizontal. + lfm->tx_size_hor[0][tx_size_y_vert].bits[i + index] |= + (above_mask_univariant_reordered[mask_id].bits[i] << shift); + // u/v vertical. + lfm->tx_size_ver[1][tx_size_uv_horz].bits[i + index] |= + (left_mask_univariant_reordered[mask_id].bits[i] << vert_shift); + // u/v horizontal. + lfm->tx_size_hor[1][tx_size_uv_vert].bits[i + index] |= + (above_mask_univariant_reordered[mask_id].bits[i] << shift); + } +} + +static void store_bitmask_other_info(AV1_COMMON *cm, int mi_row, int mi_col, + BLOCK_SIZE bsize, MB_MODE_INFO *mbmi) { + int index; + int shift; + int row; + LoopFilterMask *lfm = get_loop_filter_mask(cm, mi_row, mi_col); + const int row_start = mi_row % MI_SIZE_64X64; + const int col_start = mi_col % MI_SIZE_64X64; + shift = get_index_shift(col_start, row_start, &index); + const uint64_t top_edge_mask = + ((uint64_t)1 << (shift + mi_size_wide[bsize])) - ((uint64_t)1 << shift); + lfm->is_horz_border.bits[index] |= top_edge_mask; + const int is_vert_border = mask_id_table_vert_border[bsize]; + const int vert_shift = block_size_high[bsize] <= 8 ? shift : col_start; + for (int i = 0; i + index < 4; ++i) { + lfm->is_vert_border.bits[i + index] |= + (left_mask_univariant_reordered[is_vert_border].bits[i] << vert_shift); + } + const int is_skip = mbmi->skip && is_inter_block(mbmi); + if (is_skip) { + const int is_skip_mask = mask_id_table_tx_4x4[bsize]; + for (int i = 0; i + index < 4; ++i) { + lfm->skip.bits[i + index] |= + (above_mask_univariant_reordered[is_skip_mask].bits[i] << shift); + } + } + const uint8_t level_vert_y = get_filter_level(cm, &cm->lf_info, 0, 0, mbmi); + const uint8_t level_horz_y = get_filter_level(cm, &cm->lf_info, 1, 0, mbmi); + const uint8_t level_u = get_filter_level(cm, &cm->lf_info, 0, 1, mbmi); + const uint8_t level_v = get_filter_level(cm, &cm->lf_info, 0, 2, mbmi); + for (int r = mi_row; r < mi_row + mi_size_high[bsize]; r++) { + index = 0; + row = r % MI_SIZE_64X64; + memset(&lfm->lfl_y_ver[row][col_start], level_vert_y, + sizeof(uint8_t) * mi_size_wide[bsize]); + memset(&lfm->lfl_y_hor[row][col_start], level_horz_y, + sizeof(uint8_t) * mi_size_wide[bsize]); + memset(&lfm->lfl_u[row][col_start], level_u, + sizeof(uint8_t) * mi_size_wide[bsize]); + memset(&lfm->lfl_v[row][col_start], level_v, + sizeof(uint8_t) * mi_size_wide[bsize]); + } +} +#endif + static void parse_decode_block(AV1Decoder *const pbi, ThreadData *const td, int mi_row, int mi_col, aom_reader *r, PARTITION_TYPE partition, BLOCK_SIZE bsize) { @@ -1353,14 +1572,46 @@ static void parse_decode_block(AV1Decoder *const pbi, ThreadData *const td, for (int idy = 0; idy < height; idy += bh) for (int idx = 0; idx < width; idx += bw) - read_tx_size_vartx(xd, mbmi, max_tx_size, 0, idy, idx, r); + read_tx_size_vartx(xd, mbmi, max_tx_size, 0, +#if LOOP_FILTER_BITMASK + cm, mi_row, mi_col, +#endif + idy, idx, r); } else { mbmi->tx_size = read_tx_size(cm, xd, inter_block_tx, !mbmi->skip, r); if (inter_block_tx) memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size)); - set_txfm_ctxs(mbmi->tx_size, xd->n8_w, xd->n8_h, + set_txfm_ctxs(mbmi->tx_size, xd->n4_w, xd->n4_h, mbmi->skip && is_inter_block(mbmi), xd); +#if LOOP_FILTER_BITMASK + const int w = mi_size_wide[bsize]; + const int h = mi_size_high[bsize]; + if (w <= mi_size_wide[BLOCK_64X64] && h <= mi_size_high[BLOCK_64X64]) { + store_bitmask_univariant_tx(cm, mi_row, mi_col, bsize, mbmi); + } else { + for (int row = 0; row < h; row += mi_size_high[BLOCK_64X64]) { + for (int col = 0; col < w; col += mi_size_wide[BLOCK_64X64]) { + store_bitmask_univariant_tx(cm, mi_row + row, mi_col + col, + BLOCK_64X64, mbmi); + } + } + } +#endif + } +#if LOOP_FILTER_BITMASK + const int w = mi_size_wide[bsize]; + const int h = mi_size_high[bsize]; + if (w <= mi_size_wide[BLOCK_64X64] && h <= mi_size_high[BLOCK_64X64]) { + store_bitmask_other_info(cm, mi_row, mi_col, bsize, mbmi); + } else { + for (int row = 0; row < h; row += mi_size_high[BLOCK_64X64]) { + for (int col = 0; col < w; col += mi_size_wide[BLOCK_64X64]) { + store_bitmask_other_info(cm, mi_row + row, mi_col + col, BLOCK_64X64, + mbmi); + } + } } +#endif if (cm->delta_q_present_flag) { for (int i = 0; i < MAX_SEGMENTS; i++) { @@ -1952,6 +2203,11 @@ static void setup_quantization(AV1_COMMON *const cm, cm->v_dc_delta_q = cm->u_dc_delta_q; cm->v_ac_delta_q = cm->u_ac_delta_q; } + } else { + cm->u_dc_delta_q = 0; + cm->u_ac_delta_q = 0; + cm->v_dc_delta_q = 0; + cm->v_ac_delta_q = 0; } cm->dequant_bit_depth = seq_params->bit_depth; cm->using_qmatrix = aom_rb_read_bit(rb); @@ -2082,29 +2338,9 @@ static void resize_context_buffers(AV1_COMMON *cm, int width, int height) { cm->cur_frame->height = cm->height; } -static void setup_frame_size(AV1_COMMON *cm, int frame_size_override_flag, - struct aom_read_bit_buffer *rb) { - const SequenceHeader *const seq_params = &cm->seq_params; - int width, height; +static void setup_buffer_pool(AV1_COMMON *cm) { BufferPool *const pool = cm->buffer_pool; - - if (frame_size_override_flag) { - int num_bits_width = seq_params->num_bits_width; - int num_bits_height = seq_params->num_bits_height; - av1_read_frame_size(rb, num_bits_width, num_bits_height, &width, &height); - if (width > seq_params->max_frame_width || - height > seq_params->max_frame_height) { - aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, - "Frame dimensions are larger than the maximum values"); - } - } else { - width = seq_params->max_frame_width; - height = seq_params->max_frame_height; - } - - setup_superres(cm, rb, &width, &height); - resize_context_buffers(cm, width, height); - setup_render_size(cm, rb); + const SequenceHeader *const seq_params = &cm->seq_params; lock_buffer_pool(pool); if (aom_realloc_frame_buffer( @@ -2140,6 +2376,31 @@ static void setup_frame_size(AV1_COMMON *cm, int frame_size_override_flag, pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; } +static void setup_frame_size(AV1_COMMON *cm, int frame_size_override_flag, + struct aom_read_bit_buffer *rb) { + const SequenceHeader *const seq_params = &cm->seq_params; + int width, height; + + if (frame_size_override_flag) { + int num_bits_width = seq_params->num_bits_width; + int num_bits_height = seq_params->num_bits_height; + av1_read_frame_size(rb, num_bits_width, num_bits_height, &width, &height); + if (width > seq_params->max_frame_width || + height > seq_params->max_frame_height) { + aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, + "Frame dimensions are larger than the maximum values"); + } + } else { + width = seq_params->max_frame_width; + height = seq_params->max_frame_height; + } + + setup_superres(cm, rb, &width, &height); + resize_context_buffers(cm, width, height); + setup_render_size(cm, rb); + setup_buffer_pool(cm); +} + static void setup_sb_size(SequenceHeader *seq_params, struct aom_read_bit_buffer *rb) { set_sb_size(seq_params, aom_rb_read_bit(rb) ? BLOCK_128X128 : BLOCK_64X64); @@ -2158,7 +2419,6 @@ static void setup_frame_size_with_refs(AV1_COMMON *cm, int width, height; int found = 0; int has_valid_ref_frame = 0; - BufferPool *const pool = cm->buffer_pool; for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) { if (aom_rb_read_bit(rb)) { YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf; @@ -2208,39 +2468,7 @@ static void setup_frame_size_with_refs(AV1_COMMON *cm, aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, "Referenced frame has incompatible color format"); } - - lock_buffer_pool(pool); - if (aom_realloc_frame_buffer( - get_frame_new_buffer(cm), cm->width, cm->height, - seq_params->subsampling_x, seq_params->subsampling_y, - seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS, - cm->byte_alignment, - &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, - pool->cb_priv)) { - unlock_buffer_pool(pool); - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate frame buffer"); - } - unlock_buffer_pool(pool); - - pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = - seq_params->subsampling_x; - pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = - seq_params->subsampling_y; - pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = - (unsigned int)seq_params->bit_depth; - pool->frame_bufs[cm->new_fb_idx].buf.color_primaries = - seq_params->color_primaries; - pool->frame_bufs[cm->new_fb_idx].buf.transfer_characteristics = - seq_params->transfer_characteristics; - pool->frame_bufs[cm->new_fb_idx].buf.matrix_coefficients = - seq_params->matrix_coefficients; - pool->frame_bufs[cm->new_fb_idx].buf.monochrome = seq_params->monochrome; - pool->frame_bufs[cm->new_fb_idx].buf.chroma_sample_position = - seq_params->chroma_sample_position; - pool->frame_bufs[cm->new_fb_idx].buf.color_range = seq_params->color_range; - pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; - pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; + setup_buffer_pool(cm); } // Same function as av1_read_uniform but reading from uncompresses header wb @@ -2252,7 +2480,7 @@ static int rb_read_uniform(struct aom_read_bit_buffer *const rb, int n) { if (v < m) return v; else - return (v << 1) - m + aom_rb_read_literal(rb, 1); + return (v << 1) - m + aom_rb_read_bit(rb); } static void read_tile_info_max_tile(AV1_COMMON *const cm, @@ -2344,6 +2572,10 @@ static void read_tile_info(AV1Decoder *const pbi, // tile to use for cdf update cm->context_update_tile_id = aom_rb_read_literal(rb, cm->log2_tile_rows + cm->log2_tile_cols); + if (cm->context_update_tile_id >= cm->tile_rows * cm->tile_cols) { + aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, + "Invalid context_update_tile_id"); + } // tile size magnitude pbi->tile_size_bytes = aom_rb_read_literal(rb, 2) + 1; } @@ -2746,31 +2978,13 @@ static INLINE void sync_write(AV1DecRowMTSync *const dec_row_mt_sync, int r, #endif // CONFIG_MULTITHREAD } -static INLINE int get_sb_rows_in_tile(AV1Decoder *pbi, TileInfo tile) { - AV1_COMMON *cm = &pbi->common; - int mi_rows_aligned_to_sb = ALIGN_POWER_OF_TWO( - tile.mi_row_end - tile.mi_row_start, cm->seq_params.mib_size_log2); - int sb_rows = mi_rows_aligned_to_sb >> cm->seq_params.mib_size_log2; - - return sb_rows; -} - -static INLINE int get_sb_cols_in_tile(AV1Decoder *pbi, TileInfo tile) { - AV1_COMMON *cm = &pbi->common; - int mi_cols_aligned_to_sb = ALIGN_POWER_OF_TWO( - tile.mi_col_end - tile.mi_col_start, cm->seq_params.mib_size_log2); - int sb_cols = mi_cols_aligned_to_sb >> cm->seq_params.mib_size_log2; - - return sb_cols; -} - static void decode_tile_sb_row(AV1Decoder *pbi, ThreadData *const td, TileInfo tile_info, const int mi_row) { AV1_COMMON *const cm = &pbi->common; const int num_planes = av1_num_planes(cm); TileDataDec *const tile_data = pbi->tile_data + tile_info.tile_row * cm->tile_cols + tile_info.tile_col; - const int sb_cols_in_tile = get_sb_cols_in_tile(pbi, tile_info); + const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info); const int sb_row_in_tile = (mi_row - tile_info.mi_row_start) >> cm->seq_params.mib_size_log2; int sb_col_in_tile = 0; @@ -2792,15 +3006,11 @@ static void decode_tile_sb_row(AV1Decoder *pbi, ThreadData *const td, } static int check_trailing_bits_after_symbol_coder(aom_reader *r) { + if (aom_reader_has_overflowed(r)) return -1; + uint32_t nb_bits = aom_reader_tell(r); uint32_t nb_bytes = (nb_bits + 7) >> 3; - - const uint8_t *p_begin = aom_reader_find_begin(r); - const uint8_t *p_end = aom_reader_find_end(r); - - // It is legal to have no padding bytes (nb_bytes == p_end - p_begin). - if ((ptrdiff_t)nb_bytes > p_end - p_begin) return -1; - const uint8_t *p = p_begin + nb_bytes; + const uint8_t *p = aom_reader_find_begin(r) + nb_bytes; // aom_reader_tell() returns 1 for a newly initialized decoder, and the // return value only increases as values are decoded. So nb_bits > 0, and @@ -2810,6 +3020,7 @@ static int check_trailing_bits_after_symbol_coder(aom_reader *r) { if ((last_byte & (2 * pattern - 1)) != pattern) return -1; // Make sure that all padding bytes are zero as required by the spec. + const uint8_t *p_end = aom_reader_find_end(r); while (p < p_end) { if (*p != 0) return -1; p++; @@ -2863,6 +3074,11 @@ static void decode_tile(AV1Decoder *pbi, ThreadData *const td, int tile_row, // Bit-stream parsing and decoding of the superblock decode_partition(pbi, td, mi_row, mi_col, td->bit_reader, cm->seq_params.sb_size, 0x3); + + if (aom_reader_has_overflowed(td->bit_reader)) { + aom_merge_corrupted_flag(&td->xd.corrupted, 1); + return; + } } } @@ -2950,6 +3166,11 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data, td->xd.corrupted = 0; td->xd.mc_buf[0] = td->mc_buf[0]; td->xd.mc_buf[1] = td->mc_buf[1]; + td->xd.tmp_conv_dst = td->tmp_conv_dst; + for (int j = 0; j < 2; ++j) { + td->xd.tmp_obmc_bufs[j] = td->tmp_obmc_bufs[j]; + } + for (tile_row = tile_rows_start; tile_row < tile_rows_end; ++tile_row) { const int row = inv_row_order ? tile_rows - 1 - tile_row : tile_row; @@ -3236,6 +3457,7 @@ static int row_mt_worker_hook(void *arg1, void *arg2) { #endif frame_row_mt_info->row_mt_exit = 1; #if CONFIG_MULTITHREAD + pthread_cond_broadcast(pbi->row_mt_cond_); pthread_mutex_unlock(pbi->row_mt_mutex_); #endif return 0; @@ -3386,16 +3608,24 @@ static void alloc_dec_jobs(AV1DecTileMT *tile_mt_info, AV1_COMMON *cm, aom_malloc(sizeof(*tile_mt_info->job_queue) * num_tiles)); } -void av1_free_mc_tmp_buf(ThreadData *thread_data, int use_highbd) { +void av1_free_mc_tmp_buf(ThreadData *thread_data) { int ref; for (ref = 0; ref < 2; ref++) { - if (use_highbd) + if (thread_data->mc_buf_use_highbd) aom_free(CONVERT_TO_SHORTPTR(thread_data->mc_buf[ref])); else aom_free(thread_data->mc_buf[ref]); thread_data->mc_buf[ref] = NULL; } thread_data->mc_buf_size = 0; + thread_data->mc_buf_use_highbd = 0; + + aom_free(thread_data->tmp_conv_dst); + thread_data->tmp_conv_dst = NULL; + for (int i = 0; i < 2; ++i) { + aom_free(thread_data->tmp_obmc_bufs[i]); + thread_data->tmp_obmc_bufs[i] = NULL; + } } static void allocate_mc_tmp_buf(AV1_COMMON *const cm, ThreadData *thread_data, @@ -3411,6 +3641,17 @@ static void allocate_mc_tmp_buf(AV1_COMMON *const cm, ThreadData *thread_data, } } thread_data->mc_buf_size = buf_size; + thread_data->mc_buf_use_highbd = use_highbd; + + CHECK_MEM_ERROR(cm, thread_data->tmp_conv_dst, + aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE * + sizeof(*thread_data->tmp_conv_dst))); + for (int i = 0; i < 2; ++i) { + CHECK_MEM_ERROR( + cm, thread_data->tmp_obmc_bufs[i], + aom_memalign(16, 2 * MAX_MB_PLANE * MAX_SB_SQUARE * + sizeof(*thread_data->tmp_obmc_bufs[i]))); + } } static void reset_dec_workers(AV1Decoder *pbi, AVxWorkerHook worker_hook, @@ -3425,6 +3666,10 @@ static void reset_dec_workers(AV1Decoder *pbi, AVxWorkerHook worker_hook, thread_data->td->xd.corrupted = 0; thread_data->td->xd.mc_buf[0] = thread_data->td->mc_buf[0]; thread_data->td->xd.mc_buf[1] = thread_data->td->mc_buf[1]; + thread_data->td->xd.tmp_conv_dst = thread_data->td->tmp_conv_dst; + for (int j = 0; j < 2; ++j) { + thread_data->td->xd.tmp_obmc_bufs[j] = thread_data->td->tmp_obmc_bufs[j]; + } winterface->sync(worker); worker->hook = worker_hook; @@ -3511,7 +3756,7 @@ static void decode_mt_init(AV1Decoder *pbi) { for (worker_idx = 0; worker_idx < pbi->max_threads - 1; ++worker_idx) { DecWorkerData *const thread_data = pbi->thread_data + worker_idx; if (thread_data->td->mc_buf_size != buf_size) { - av1_free_mc_tmp_buf(thread_data->td, use_highbd); + av1_free_mc_tmp_buf(thread_data->td); allocate_mc_tmp_buf(cm, thread_data->td, buf_size, use_highbd); } } @@ -3783,8 +4028,8 @@ static const uint8_t *decode_tiles_row_mt(AV1Decoder *pbi, const uint8_t *data, TileDataDec *tile_data = pbi->tile_data + row * cm->tile_cols + col; av1_tile_init(&tile_data->tile_info, cm, row, col); - max_sb_rows = - AOMMAX(max_sb_rows, get_sb_rows_in_tile(pbi, tile_data->tile_info)); + max_sb_rows = AOMMAX(max_sb_rows, + av1_get_sb_rows_in_tile(cm, tile_data->tile_info)); } } @@ -3905,6 +4150,8 @@ void av1_read_film_grain_params(AV1_COMMON *cm, if (!seq_params->monochrome) pars->chroma_scaling_from_luma = aom_rb_read_bit(rb); + else + pars->chroma_scaling_from_luma = 0; if (seq_params->monochrome || pars->chroma_scaling_from_luma || ((seq_params->subsampling_x == 1) && (seq_params->subsampling_y == 1) && @@ -4412,6 +4659,29 @@ static void show_existing_frame_reset(AV1Decoder *const pbi, *cm->fc = cm->frame_contexts[existing_frame_idx]; } +static INLINE void reset_frame_buffers(AV1_COMMON *cm) { + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + int i; + + memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map)); + + lock_buffer_pool(cm->buffer_pool); + for (i = 0; i < FRAME_BUFFERS; ++i) { + if (i != cm->new_fb_idx) { + frame_bufs[i].ref_count = 0; + cm->buffer_pool->release_fb_cb(cm->buffer_pool->cb_priv, + &frame_bufs[i].raw_frame_buffer); + } else { + assert(frame_bufs[i].ref_count == 1); + } + frame_bufs[i].cur_frame_offset = 0; + av1_zero(frame_bufs[i].ref_frame_offset); + } + av1_zero_unused_internal_frame_buffers(&cm->buffer_pool->int_frame_buffers); + unlock_buffer_pool(cm->buffer_pool); +} + // On success, returns 0. On failure, calls aom_internal_error and does not // return. static int read_uncompressed_header(AV1Decoder *pbi, @@ -4443,6 +4713,11 @@ static int read_uncompressed_header(AV1Decoder *pbi, cm->reset_decoder_state = 0; if (cm->show_existing_frame) { + if (pbi->sequence_header_changed) { + aom_internal_error( + &cm->error, AOM_CODEC_CORRUPT_FRAME, + "New sequence header starts with a show_existing_frame."); + } // Show an existing frame directly. const int existing_frame_idx = aom_rb_read_literal(rb, 3); const int frame_to_show = cm->ref_frame_map[existing_frame_idx]; @@ -4493,6 +4768,18 @@ static int read_uncompressed_header(AV1Decoder *pbi, } cm->frame_type = (FRAME_TYPE)aom_rb_read_literal(rb, 2); // 2 bits + if (pbi->sequence_header_changed) { + if (pbi->common.frame_type == KEY_FRAME) { + // This is the start of a new coded video sequence. + pbi->sequence_header_changed = 0; + pbi->decoding_first_frame = 1; + reset_frame_buffers(&pbi->common); + } else { + aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, + "Sequence header has changed without a keyframe."); + } + } + cm->show_frame = aom_rb_read_bit(rb); if (seq_params->still_picture && (cm->frame_type != KEY_FRAME || !cm->show_frame)) { @@ -4582,8 +4869,7 @@ static int read_uncompressed_header(AV1Decoder *pbi, } } - frame_size_override_flag = - frame_is_sframe(cm) ? 1 : aom_rb_read_literal(rb, 1); + frame_size_override_flag = frame_is_sframe(cm) ? 1 : aom_rb_read_bit(rb); cm->frame_offset = aom_rb_read_literal(rb, seq_params->order_hint_bits_minus_1 + 1); @@ -5152,7 +5438,7 @@ static void setup_frame_info(AV1Decoder *pbi) { const int use_highbd = cm->seq_params.use_highbitdepth ? 1 : 0; const int buf_size = MC_TEMP_BUF_PELS << use_highbd; if (pbi->td.mc_buf_size != buf_size) { - av1_free_mc_tmp_buf(&pbi->td, use_highbd); + av1_free_mc_tmp_buf(&pbi->td); allocate_mc_tmp_buf(cm, &pbi->td, buf_size, use_highbd); } } @@ -5166,6 +5452,11 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data, const int tile_count_tg = end_tile - start_tile + 1; if (initialize_flag) setup_frame_info(pbi); + const int num_planes = av1_num_planes(cm); +#if LOOP_FILTER_BITMASK + av1_loop_filter_frame_init(cm, 0, num_planes); + av1_zero_array(cm->lf.lfm, cm->lf.lfm_num); +#endif if (pbi->max_threads > 1 && !(cm->large_scale_tile && !pbi->ext_tile_debug) && pbi->row_mt) @@ -5177,7 +5468,6 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data, else *p_data_end = decode_tiles(pbi, data, data_end, start_tile, end_tile); - const int num_planes = av1_num_planes(cm); // If the bit stream is monochrome, set the U and V buffers to a constant. if (num_planes < 3) { set_planes_to_neutral_grey(&cm->seq_params, xd->cur_buf, 1); @@ -5190,7 +5480,7 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data, if (!cm->allow_intrabc && !cm->single_tile_decoding) { if (cm->lf.filter_level[0] || cm->lf.filter_level[1]) { #if LOOP_FILTER_BITMASK - av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb, 0, + av1_loop_filter_frame(get_frame_new_buffer(cm), cm, &pbi->mb, 1, 0, num_planes, 0); #else if (pbi->num_workers > 1) { @@ -5255,6 +5545,7 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data, if (!xd->corrupted) { if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) { + assert(cm->context_update_tile_id < pbi->allocated_tiles); *cm->fc = pbi->tile_data[cm->context_update_tile_id].tctx; av1_reset_cdf_symbol_counters(cm->fc); } |