diff options
Diffstat (limited to 'third_party/aom/av1/decoder')
-rw-r--r-- | third_party/aom/av1/decoder/decodeframe.c | 1726 | ||||
-rw-r--r-- | third_party/aom/av1/decoder/decodeframe.h | 29 | ||||
-rw-r--r-- | third_party/aom/av1/decoder/decodemv.c | 13 | ||||
-rw-r--r-- | third_party/aom/av1/decoder/decoder.c | 56 | ||||
-rw-r--r-- | third_party/aom/av1/decoder/decoder.h | 96 | ||||
-rw-r--r-- | third_party/aom/av1/decoder/decodetxb.c | 31 | ||||
-rw-r--r-- | third_party/aom/av1/decoder/decodetxb.h | 8 | ||||
-rw-r--r-- | third_party/aom/av1/decoder/dthread.c | 8 | ||||
-rw-r--r-- | third_party/aom/av1/decoder/dthread.h | 1 | ||||
-rw-r--r-- | third_party/aom/av1/decoder/obu.c | 252 |
10 files changed, 1607 insertions, 613 deletions
diff --git a/third_party/aom/av1/decoder/decodeframe.c b/third_party/aom/av1/decoder/decodeframe.c index e92c6b28c..6dbc4f3eb 100644 --- a/third_party/aom/av1/decoder/decodeframe.c +++ b/third_party/aom/av1/decoder/decodeframe.c @@ -84,15 +84,15 @@ int av1_check_trailing_bits(AV1Decoder *pbi, struct aom_read_bit_buffer *rb) { } // Use only_chroma = 1 to only set the chroma planes -static void set_planes_to_neutral_grey(AV1_COMMON *const cm, +static void set_planes_to_neutral_grey(const SequenceHeader *const seq_params, const YV12_BUFFER_CONFIG *const buf, int only_chroma) { - const int val = 1 << (cm->bit_depth - 1); + const int val = 1 << (seq_params->bit_depth - 1); for (int plane = only_chroma; plane < MAX_MB_PLANE; plane++) { const int is_uv = plane > 0; for (int row_idx = 0; row_idx < buf->crop_heights[is_uv]; row_idx++) { - if (cm->use_highbitdepth) { + if (seq_params->use_highbitdepth) { // TODO(yaowu): replace this with aom_memset16() for speed for (int col_idx = 0; col_idx < buf->crop_widths[is_uv]; col_idx++) { uint16_t *base = CONVERT_TO_SHORTPTR(buf->buffers[plane]); @@ -157,16 +157,18 @@ static void inverse_transform_block(MACROBLOCKD *xd, int plane, memset(dqcoeff, 0, (scan_line + 1) * sizeof(dqcoeff[0])); } -static void read_coeffs_tx_intra_block(AV1_COMMON *cm, MACROBLOCKD *const xd, - aom_reader *const r, int plane, int row, - int col, TX_SIZE tx_size) { +static void read_coeffs_tx_intra_block(const AV1_COMMON *const cm, + MACROBLOCKD *const xd, + aom_reader *const r, const int plane, + const int row, const int col, + const TX_SIZE tx_size) { MB_MODE_INFO *mbmi = xd->mi[0]; if (!mbmi->skip) { #if TXCOEFF_TIMER struct aom_usec_timer timer; aom_usec_timer_start(&timer); #endif - av1_read_coeffs_txb_facade(cm, xd, r, row, col, plane, tx_size); + av1_read_coeffs_txb_facade(cm, xd, r, plane, row, col, tx_size); #if TXCOEFF_TIMER aom_usec_timer_mark(&timer); const int64_t elapsed_time = aom_usec_timer_elapsed(&timer); @@ -176,11 +178,38 @@ static void read_coeffs_tx_intra_block(AV1_COMMON *cm, MACROBLOCKD *const xd, } } -static void predict_and_reconstruct_intra_block(AV1_COMMON *cm, - MACROBLOCKD *const xd, - aom_reader *const r, int plane, - int row, int col, - TX_SIZE tx_size) { +static void decode_block_void(const AV1_COMMON *const cm, MACROBLOCKD *const xd, + aom_reader *const r, const int plane, + const int row, const int col, + const TX_SIZE tx_size) { + (void)cm; + (void)xd; + (void)r; + (void)plane; + (void)row; + (void)col; + (void)tx_size; +} + +static void predict_inter_block_void(AV1_COMMON *const cm, + MACROBLOCKD *const xd, int mi_row, + int mi_col, BLOCK_SIZE bsize) { + (void)cm; + (void)xd; + (void)mi_row; + (void)mi_col; + (void)bsize; +} + +static void cfl_store_inter_block_void(AV1_COMMON *const cm, + MACROBLOCKD *const xd) { + (void)cm; + (void)xd; +} + +static void predict_and_reconstruct_intra_block( + const AV1_COMMON *const cm, MACROBLOCKD *const xd, aom_reader *const r, + const int plane, const int row, const int col, const TX_SIZE tx_size) { (void)r; MB_MODE_INFO *mbmi = xd->mi[0]; PLANE_TYPE plane_type = get_plane_type(plane); @@ -208,28 +237,33 @@ static void predict_and_reconstruct_intra_block(AV1_COMMON *cm, static void inverse_transform_inter_block(const AV1_COMMON *const cm, MACROBLOCKD *const xd, - aom_reader *const r, + aom_reader *const r, const int plane, const int blk_row, const int blk_col, - const int plane, const TX_SIZE tx_size) { (void)r; PLANE_TYPE plane_type = get_plane_type(plane); const struct macroblockd_plane *const pd = &xd->plane[plane]; - MB_MODE_INFO *mbmi = xd->mi[0]; // tx_type will be read out in av1_read_coeffs_txb_facade const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, tx_size, cm->reduced_tx_set_used); - if (plane == 0) - update_txk_array(mbmi->txk_type, mbmi->sb_type, blk_row, blk_col, tx_size, - tx_type); - uint8_t *dst = &pd->dst .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]]; inverse_transform_block(xd, plane, tx_type, tx_size, dst, pd->dst.stride, cm->reduced_tx_set_used); +#if CONFIG_MISMATCH_DEBUG + int pixel_c, pixel_r; + BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; + int blk_w = block_size_wide[bsize]; + int blk_h = block_size_high[bsize]; + mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, blk_col, blk_row, + pd->subsampling_x, pd->subsampling_y); + mismatch_check_block_tx(dst, pd->dst.stride, cm->frame_offset, plane, pixel_c, + pixel_r, blk_w, blk_h, + xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); +#endif } static void set_cb_buffer_offsets(MACROBLOCKD *const xd, TX_SIZE tx_size, @@ -239,11 +273,12 @@ static void set_cb_buffer_offsets(MACROBLOCKD *const xd, TX_SIZE tx_size, xd->cb_offset[plane] / (TX_SIZE_W_MIN * TX_SIZE_H_MIN); } -static void decode_reconstruct_tx(AV1_COMMON *cm, MACROBLOCKD *const xd, +static void decode_reconstruct_tx(AV1_COMMON *cm, ThreadData *const td, aom_reader *r, MB_MODE_INFO *const mbmi, int plane, BLOCK_SIZE plane_bsize, int blk_row, int blk_col, int block, TX_SIZE tx_size, int *eob_total) { + MACROBLOCKD *const xd = &td->xd; const struct macroblockd_plane *const pd = &xd->plane[plane]; const TX_SIZE plane_tx_size = plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x, @@ -257,30 +292,11 @@ static void decode_reconstruct_tx(AV1_COMMON *cm, MACROBLOCKD *const xd, if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; if (tx_size == plane_tx_size || plane) { -#if TXCOEFF_TIMER - struct aom_usec_timer timer; - aom_usec_timer_start(&timer); -#endif - av1_read_coeffs_txb_facade(cm, xd, r, blk_row, blk_col, plane, tx_size); -#if TXCOEFF_TIMER - aom_usec_timer_mark(&timer); - const int64_t elapsed_time = aom_usec_timer_elapsed(&timer); - cm->txcoeff_timer += elapsed_time; - ++cm->txb_count; -#endif - inverse_transform_inter_block(cm, xd, r, blk_row, blk_col, plane, tx_size); + td->read_coeffs_tx_inter_block_visit(cm, xd, r, plane, blk_row, blk_col, + tx_size); -#if CONFIG_MISMATCH_DEBUG - int pixel_c, pixel_r; - BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; - int blk_w = block_size_wide[bsize]; - int blk_h = block_size_high[bsize]; - mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, blk_col, blk_row, - pd->subsampling_x, pd->subsampling_y); - mismatch_check_block_tx(dst, pd->dst.stride, cm->frame_offset, plane, - pixel_c, pixel_r, blk_w, blk_h, - xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); -#endif + td->inverse_tx_inter_block_visit(cm, xd, r, plane, blk_row, blk_col, + tx_size); eob_info *eob_data = pd->eob_data + xd->txb_offset[plane]; *eob_total += eob_data->eob; set_cb_buffer_offsets(xd, tx_size, plane); @@ -301,7 +317,7 @@ static void decode_reconstruct_tx(AV1_COMMON *cm, MACROBLOCKD *const xd, if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; - decode_reconstruct_tx(cm, xd, r, mbmi, plane, plane_bsize, offsetr, + decode_reconstruct_tx(cm, td, r, mbmi, plane, plane_bsize, offsetr, offsetc, block, sub_txs, eob_total); block += sub_step; } @@ -352,6 +368,7 @@ static void decode_mbmi_block(AV1Decoder *const pbi, MACROBLOCKD *const xd, int mi_row, int mi_col, aom_reader *r, PARTITION_TYPE partition, BLOCK_SIZE bsize) { AV1_COMMON *const cm = &pbi->common; + const SequenceHeader *const seq_params = &cm->seq_params; const int bw = mi_size_wide[bsize]; const int bh = mi_size_high[bsize]; const int x_mis = AOMMIN(bw, cm->mi_cols - mi_col); @@ -363,9 +380,11 @@ static void decode_mbmi_block(AV1Decoder *const pbi, MACROBLOCKD *const xd, set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis); xd->mi[0]->partition = partition; av1_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); - if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { + if (bsize >= BLOCK_8X8 && + (seq_params->subsampling_x || seq_params->subsampling_y)) { const BLOCK_SIZE uv_subsize = - ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y]; + ss_size_lookup[bsize][seq_params->subsampling_x] + [seq_params->subsampling_y]; if (uv_subsize == BLOCK_INVALID) aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME, "Invalid block size."); @@ -843,8 +862,8 @@ static void dec_build_inter_predictors_sby(const AV1_COMMON *cm, BUFFER_SET default_ctx = { { xd->plane[0].dst.buf, NULL, NULL }, { xd->plane[0].dst.stride, 0, 0 } }; if (!ctx) ctx = &default_ctx; - av1_build_interintra_predictors_sby(cm, xd, xd->plane[0].dst.buf, - xd->plane[0].dst.stride, ctx, bsize); + av1_build_interintra_predictors_sbp(cm, xd, xd->plane[0].dst.buf, + xd->plane[0].dst.stride, ctx, 0, bsize); } } @@ -1052,6 +1071,20 @@ static void predict_inter_block(AV1_COMMON *const cm, MACROBLOCKD *const xd, dec_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); if (mbmi->motion_mode == OBMC_CAUSAL) dec_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); +#if CONFIG_MISMATCH_DEBUG + for (int plane = 0; plane < num_planes; ++plane) { + const struct macroblockd_plane *pd = &xd->plane[plane]; + int pixel_c, pixel_r; + mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, pd->subsampling_x, + pd->subsampling_y); + if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x, + pd->subsampling_y)) + continue; + mismatch_check_block_pre(pd->dst.buf, pd->dst.stride, cm->frame_offset, + plane, pixel_c, pixel_r, pd->width, pd->height, + xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); + } +#endif } static void set_color_index_map_offset(MACROBLOCKD *const xd, int plane, @@ -1064,42 +1097,19 @@ static void set_color_index_map_offset(MACROBLOCKD *const xd, int plane, xd->color_index_map_offset[plane] += params.plane_width * params.plane_height; } -static void decode_token_and_recon_block(AV1Decoder *const pbi, - MACROBLOCKD *const xd, int mi_row, - int mi_col, aom_reader *r, - BLOCK_SIZE bsize) { +static void decode_token_recon_block(AV1Decoder *const pbi, + ThreadData *const td, int mi_row, + int mi_col, aom_reader *r, + BLOCK_SIZE bsize) { AV1_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &td->xd; const int num_planes = av1_num_planes(cm); - const int bw = mi_size_wide[bsize]; - const int bh = mi_size_high[bsize]; - const int x_mis = AOMMIN(bw, cm->mi_cols - mi_col); - const int y_mis = AOMMIN(bh, cm->mi_rows - mi_row); - set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis); MB_MODE_INFO *mbmi = xd->mi[0]; CFL_CTX *const cfl = &xd->cfl; cfl->is_chroma_reference = is_chroma_reference( mi_row, mi_col, bsize, cfl->subsampling_x, cfl->subsampling_y); - if (cm->delta_q_present_flag) { - for (int i = 0; i < MAX_SEGMENTS; i++) { - const int current_qindex = - av1_get_qindex(&cm->seg, i, xd->current_qindex); - for (int j = 0; j < num_planes; ++j) { - const int dc_delta_q = - j == 0 ? cm->y_dc_delta_q - : (j == 1 ? cm->u_dc_delta_q : cm->v_dc_delta_q); - const int ac_delta_q = - j == 0 ? 0 : (j == 1 ? cm->u_ac_delta_q : cm->v_ac_delta_q); - xd->plane[j].seg_dequant_QTX[i][0] = - av1_dc_quant_QTX(current_qindex, dc_delta_q, cm->bit_depth); - xd->plane[j].seg_dequant_QTX[i][1] = - av1_ac_quant_QTX(current_qindex, ac_delta_q, cm->bit_depth); - } - } - } - if (mbmi->skip) av1_reset_skip_context(xd, mi_row, mi_col, bsize, num_planes); - if (!is_inter_block(mbmi)) { int row, col; assert(bsize == get_plane_block_size(bsize, xd->plane[0].subsampling_x, @@ -1135,10 +1145,10 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi, blk_row += stepr) { for (int blk_col = col >> pd->subsampling_x; blk_col < unit_width; blk_col += stepc) { - read_coeffs_tx_intra_block(cm, xd, r, plane, blk_row, blk_col, - tx_size); - predict_and_reconstruct_intra_block(cm, xd, r, plane, blk_row, - blk_col, tx_size); + td->read_coeffs_tx_intra_block_visit(cm, xd, r, plane, blk_row, + blk_col, tx_size); + td->predict_and_recon_intra_block_visit(cm, xd, r, plane, blk_row, + blk_col, tx_size); set_cb_buffer_offsets(xd, tx_size, plane); } } @@ -1146,22 +1156,7 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi, } } } else { - predict_inter_block(cm, xd, mi_row, mi_col, bsize); -#if CONFIG_MISMATCH_DEBUG - for (int plane = 0; plane < num_planes; ++plane) { - const struct macroblockd_plane *pd = &xd->plane[plane]; - int pixel_c, pixel_r; - mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, - pd->subsampling_x, pd->subsampling_y); - if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x, - pd->subsampling_y)) - continue; - mismatch_check_block_pre(pd->dst.buf, pd->dst.stride, cm->frame_offset, - plane, pixel_c, pixel_r, pd->width, pd->height, - xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); - } -#endif - + td->predict_inter_block_visit(cm, xd, mi_row, mi_col, bsize); // Reconstruction if (!mbmi->skip) { int eobtotal = 0; @@ -1213,7 +1208,7 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi, blk_row += bh_var_tx) { for (blk_col = col >> pd->subsampling_x; blk_col < unit_width; blk_col += bw_var_tx) { - decode_reconstruct_tx(cm, xd, r, mbmi, plane, plane_bsize, + decode_reconstruct_tx(cm, td, r, mbmi, plane, plane_bsize, blk_row, blk_col, block, max_tx_size, &eobtotal); block += step; @@ -1223,14 +1218,11 @@ static void decode_token_and_recon_block(AV1Decoder *const pbi, } } } - cfl_store_inter_block(cm, xd); + td->cfl_store_inter_block_visit(cm, xd); } av1_visit_palette(pbi, xd, mi_row, mi_col, r, bsize, set_color_index_map_offset); - - int reader_corrupted_flag = aom_reader_has_error(r); - aom_merge_corrupted_flag(&xd->corrupted, reader_corrupted_flag); } static void read_tx_size_vartx(MACROBLOCKD *xd, MB_MODE_INFO *mbmi, @@ -1338,15 +1330,17 @@ static TX_SIZE read_tx_size(AV1_COMMON *cm, MACROBLOCKD *xd, int is_inter, } } -static void decode_block(AV1Decoder *const pbi, MACROBLOCKD *const xd, - int mi_row, int mi_col, aom_reader *r, - PARTITION_TYPE partition, BLOCK_SIZE bsize) { +static void parse_decode_block(AV1Decoder *const pbi, ThreadData *const td, + int mi_row, int mi_col, aom_reader *r, + PARTITION_TYPE partition, BLOCK_SIZE bsize) { + MACROBLOCKD *const xd = &td->xd; decode_mbmi_block(pbi, xd, mi_row, mi_col, r, partition, bsize); av1_visit_palette(pbi, xd, mi_row, mi_col, r, bsize, av1_decode_palette_tokens); AV1_COMMON *cm = &pbi->common; + const int num_planes = av1_num_planes(cm); MB_MODE_INFO *mbmi = xd->mi[0]; int inter_block_tx = is_inter_block(mbmi) || is_intrabc_block(mbmi); if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(bsize) && @@ -1368,7 +1362,63 @@ static void decode_block(AV1Decoder *const pbi, MACROBLOCKD *const xd, mbmi->skip && is_inter_block(mbmi), xd); } - decode_token_and_recon_block(pbi, xd, mi_row, mi_col, r, bsize); + if (cm->delta_q_present_flag) { + for (int i = 0; i < MAX_SEGMENTS; i++) { + const int current_qindex = + av1_get_qindex(&cm->seg, i, xd->current_qindex); + for (int j = 0; j < num_planes; ++j) { + const int dc_delta_q = + j == 0 ? cm->y_dc_delta_q + : (j == 1 ? cm->u_dc_delta_q : cm->v_dc_delta_q); + const int ac_delta_q = + j == 0 ? 0 : (j == 1 ? cm->u_ac_delta_q : cm->v_ac_delta_q); + xd->plane[j].seg_dequant_QTX[i][0] = av1_dc_quant_QTX( + current_qindex, dc_delta_q, cm->seq_params.bit_depth); + xd->plane[j].seg_dequant_QTX[i][1] = av1_ac_quant_QTX( + current_qindex, ac_delta_q, cm->seq_params.bit_depth); + } + } + } + if (mbmi->skip) av1_reset_skip_context(xd, mi_row, mi_col, bsize, num_planes); + + decode_token_recon_block(pbi, td, mi_row, mi_col, r, bsize); + + int reader_corrupted_flag = aom_reader_has_error(r); + aom_merge_corrupted_flag(&xd->corrupted, reader_corrupted_flag); +} + +static void set_offsets_for_pred_and_recon(AV1Decoder *const pbi, + ThreadData *const td, int mi_row, + int mi_col, BLOCK_SIZE bsize) { + AV1_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &td->xd; + const int bw = mi_size_wide[bsize]; + const int bh = mi_size_high[bsize]; + const int num_planes = av1_num_planes(cm); + + const int offset = mi_row * cm->mi_stride + mi_col; + const TileInfo *const tile = &xd->tile; + + xd->mi = cm->mi_grid_visible + offset; + xd->cfl.mi_row = mi_row; + xd->cfl.mi_col = mi_col; + + set_plane_n4(xd, bw, bh, num_planes); + + // Distance of Mb to the various image edges. These are specified to 8th pel + // as they are always compared to values that are in 1/8th pel units + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); + + av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row, + mi_col, 0, num_planes); +} + +static void decode_block(AV1Decoder *const pbi, ThreadData *const td, + int mi_row, int mi_col, aom_reader *r, + PARTITION_TYPE partition, BLOCK_SIZE bsize) { + (void)partition; + set_offsets_for_pred_and_recon(pbi, td, mi_row, mi_col, bsize); + decode_token_recon_block(pbi, td, mi_row, mi_col, r, bsize); } static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col, @@ -1401,10 +1451,11 @@ static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col, } // TODO(slavarnway): eliminate bsize and subsize in future commits -static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd, +static void decode_partition(AV1Decoder *const pbi, ThreadData *const td, int mi_row, int mi_col, aom_reader *r, - BLOCK_SIZE bsize) { + BLOCK_SIZE bsize, int parse_decode_flag) { AV1_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &td->xd; const int bw = mi_size_wide[bsize]; const int hbs = bw >> 1; PARTITION_TYPE partition; @@ -1416,25 +1467,36 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd, if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - const int num_planes = av1_num_planes(cm); - for (int plane = 0; plane < num_planes; ++plane) { - int rcol0, rcol1, rrow0, rrow1, tile_tl_idx; - if (av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize, - &rcol0, &rcol1, &rrow0, &rrow1, - &tile_tl_idx)) { - const int rstride = cm->rst_info[plane].horz_units_per_tile; - for (int rrow = rrow0; rrow < rrow1; ++rrow) { - for (int rcol = rcol0; rcol < rcol1; ++rcol) { - const int runit_idx = tile_tl_idx + rcol + rrow * rstride; - loop_restoration_read_sb_coeffs(cm, xd, r, plane, runit_idx); + // parse_decode_flag takes the following values : + // 01 - do parse only + // 10 - do decode only + // 11 - do parse and decode + static const block_visitor_fn_t block_visit[4] = { + NULL, parse_decode_block, decode_block, parse_decode_block + }; + + if (parse_decode_flag & 1) { + const int num_planes = av1_num_planes(cm); + for (int plane = 0; plane < num_planes; ++plane) { + int rcol0, rcol1, rrow0, rrow1; + if (av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize, + &rcol0, &rcol1, &rrow0, &rrow1)) { + const int rstride = cm->rst_info[plane].horz_units_per_tile; + for (int rrow = rrow0; rrow < rrow1; ++rrow) { + for (int rcol = rcol0; rcol < rcol1; ++rcol) { + const int runit_idx = rcol + rrow * rstride; + loop_restoration_read_sb_coeffs(cm, xd, r, plane, runit_idx); + } } } } - } - partition = (bsize < BLOCK_8X8) ? PARTITION_NONE - : read_partition(xd, mi_row, mi_col, r, - has_rows, has_cols, bsize); + partition = (bsize < BLOCK_8X8) ? PARTITION_NONE + : read_partition(xd, mi_row, mi_col, r, + has_rows, has_cols, bsize); + } else { + partition = get_partition(cm, mi_row, mi_col, bsize); + } subsize = get_partition_subsize(bsize, partition); // Check the bitstream is conformant: if there is subsampling on the @@ -1442,18 +1504,19 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd, const struct macroblockd_plane *const pd_u = &xd->plane[1]; if (get_plane_block_size(subsize, pd_u->subsampling_x, pd_u->subsampling_y) == BLOCK_INVALID) { - aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, + aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME, "Block size %dx%d invalid with this subsampling mode", block_size_wide[subsize], block_size_high[subsize]); } #define DEC_BLOCK_STX_ARG #define DEC_BLOCK_EPT_ARG partition, -#define DEC_BLOCK(db_r, db_c, db_subsize) \ - decode_block(pbi, xd, DEC_BLOCK_STX_ARG(db_r), (db_c), r, \ - DEC_BLOCK_EPT_ARG(db_subsize)) -#define DEC_PARTITION(db_r, db_c, db_subsize) \ - decode_partition(pbi, xd, DEC_BLOCK_STX_ARG(db_r), (db_c), r, (db_subsize)) +#define DEC_BLOCK(db_r, db_c, db_subsize) \ + block_visit[parse_decode_flag](pbi, td, DEC_BLOCK_STX_ARG(db_r), (db_c), r, \ + DEC_BLOCK_EPT_ARG(db_subsize)) +#define DEC_PARTITION(db_r, db_c, db_subsize) \ + decode_partition(pbi, td, DEC_BLOCK_STX_ARG(db_r), (db_c), r, (db_subsize), \ + parse_decode_flag) switch (partition) { case PARTITION_NONE: DEC_BLOCK(mi_row, mi_col, subsize); break; @@ -1513,7 +1576,8 @@ static void decode_partition(AV1Decoder *const pbi, MACROBLOCKD *const xd, #undef DEC_BLOCK_EPT_ARG #undef DEC_BLOCK_STX_ARG - update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition); + if (parse_decode_flag & 1) + update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition); } static void setup_bool_decoder(const uint8_t *data, const uint8_t *data_end, @@ -1650,7 +1714,7 @@ static void decode_restoration_mode(AV1_COMMON *cm, } if (num_planes > 1) { - int s = AOMMIN(cm->subsampling_x, cm->subsampling_y); + int s = AOMMIN(cm->seq_params.subsampling_x, cm->seq_params.subsampling_y); if (s && !chroma_none) { cm->rst_info[1].restoration_unit_size = cm->rst_info[0].restoration_unit_size >> (aom_rb_read_bit(rb) * s); @@ -1872,12 +1936,13 @@ static INLINE int read_delta_q(struct aom_read_bit_buffer *rb) { static void setup_quantization(AV1_COMMON *const cm, struct aom_read_bit_buffer *rb) { + const SequenceHeader *const seq_params = &cm->seq_params; const int num_planes = av1_num_planes(cm); cm->base_qindex = aom_rb_read_literal(rb, QINDEX_BITS); cm->y_dc_delta_q = read_delta_q(rb); if (num_planes > 1) { int diff_uv_delta = 0; - if (cm->separate_uv_delta_q) diff_uv_delta = aom_rb_read_bit(rb); + if (seq_params->separate_uv_delta_q) diff_uv_delta = aom_rb_read_bit(rb); cm->u_dc_delta_q = read_delta_q(rb); cm->u_ac_delta_q = read_delta_q(rb); if (diff_uv_delta) { @@ -1888,12 +1953,12 @@ static void setup_quantization(AV1_COMMON *const cm, cm->v_ac_delta_q = cm->u_ac_delta_q; } } - cm->dequant_bit_depth = cm->bit_depth; + cm->dequant_bit_depth = seq_params->bit_depth; cm->using_qmatrix = aom_rb_read_bit(rb); if (cm->using_qmatrix) { cm->qm_y = aom_rb_read_literal(rb, QM_LEVEL_BITS); cm->qm_u = aom_rb_read_literal(rb, QM_LEVEL_BITS); - if (!cm->separate_uv_delta_q) + if (!seq_params->separate_uv_delta_q) cm->qm_v = cm->qm_u; else cm->qm_v = aom_rb_read_literal(rb, QM_LEVEL_BITS); @@ -1906,6 +1971,7 @@ static void setup_quantization(AV1_COMMON *const cm, // Build y/uv dequant values based on segmentation. static void setup_segmentation_dequant(AV1_COMMON *const cm) { + const int bit_depth = cm->seq_params.bit_depth; const int using_qm = cm->using_qmatrix; // When segmentation is disabled, only the first value is used. The // remaining are don't cares. @@ -1913,16 +1979,16 @@ static void setup_segmentation_dequant(AV1_COMMON *const cm) { for (int i = 0; i < max_segments; ++i) { const int qindex = av1_get_qindex(&cm->seg, i, cm->base_qindex); cm->y_dequant_QTX[i][0] = - av1_dc_quant_QTX(qindex, cm->y_dc_delta_q, cm->bit_depth); - cm->y_dequant_QTX[i][1] = av1_ac_quant_QTX(qindex, 0, cm->bit_depth); + av1_dc_quant_QTX(qindex, cm->y_dc_delta_q, bit_depth); + cm->y_dequant_QTX[i][1] = av1_ac_quant_QTX(qindex, 0, bit_depth); cm->u_dequant_QTX[i][0] = - av1_dc_quant_QTX(qindex, cm->u_dc_delta_q, cm->bit_depth); + av1_dc_quant_QTX(qindex, cm->u_dc_delta_q, bit_depth); cm->u_dequant_QTX[i][1] = - av1_ac_quant_QTX(qindex, cm->u_ac_delta_q, cm->bit_depth); + av1_ac_quant_QTX(qindex, cm->u_ac_delta_q, bit_depth); cm->v_dequant_QTX[i][0] = - av1_dc_quant_QTX(qindex, cm->v_dc_delta_q, cm->bit_depth); + av1_dc_quant_QTX(qindex, cm->v_dc_delta_q, bit_depth); cm->v_dequant_QTX[i][1] = - av1_ac_quant_QTX(qindex, cm->v_ac_delta_q, cm->bit_depth); + av1_ac_quant_QTX(qindex, cm->v_ac_delta_q, bit_depth); const int lossless = qindex == 0 && cm->y_dc_delta_q == 0 && cm->u_dc_delta_q == 0 && cm->u_ac_delta_q == 0 && cm->v_dc_delta_q == 0 && cm->v_ac_delta_q == 0; @@ -1994,9 +2060,15 @@ static void resize_context_buffers(AV1_COMMON *cm, int width, int height) { // Allocations in av1_alloc_context_buffers() depend on individual // dimensions as well as the overall size. if (new_mi_cols > cm->mi_cols || new_mi_rows > cm->mi_rows) { - if (av1_alloc_context_buffers(cm, width, height)) + if (av1_alloc_context_buffers(cm, width, height)) { + // The cm->mi_* values have been cleared and any existing context + // buffers have been freed. Clear cm->width and cm->height to be + // consistent and to force a realloc next time. + cm->width = 0; + cm->height = 0; aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, "Failed to allocate context buffers"); + } } else { av1_set_mb_mi(cm, width, height); } @@ -2012,21 +2084,22 @@ static void resize_context_buffers(AV1_COMMON *cm, int width, int height) { static void setup_frame_size(AV1_COMMON *cm, int frame_size_override_flag, struct aom_read_bit_buffer *rb) { + const SequenceHeader *const seq_params = &cm->seq_params; int width, height; BufferPool *const pool = cm->buffer_pool; if (frame_size_override_flag) { - int num_bits_width = cm->seq_params.num_bits_width; - int num_bits_height = cm->seq_params.num_bits_height; + int num_bits_width = seq_params->num_bits_width; + int num_bits_height = seq_params->num_bits_height; av1_read_frame_size(rb, num_bits_width, num_bits_height, &width, &height); - if (width > cm->seq_params.max_frame_width || - height > cm->seq_params.max_frame_height) { + if (width > seq_params->max_frame_width || + height > seq_params->max_frame_height) { aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, "Frame dimensions are larger than the maximum values"); } } else { - width = cm->seq_params.max_frame_width; - height = cm->seq_params.max_frame_height; + width = seq_params->max_frame_width; + height = seq_params->max_frame_height; } setup_superres(cm, rb, &width, &height); @@ -2035,8 +2108,9 @@ static void setup_frame_size(AV1_COMMON *cm, int frame_size_override_flag, lock_buffer_pool(pool); if (aom_realloc_frame_buffer( - get_frame_new_buffer(cm), cm->width, cm->height, cm->subsampling_x, - cm->subsampling_y, cm->use_highbitdepth, AOM_BORDER_IN_PIXELS, + get_frame_new_buffer(cm), cm->width, cm->height, + seq_params->subsampling_x, seq_params->subsampling_y, + seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS, cm->byte_alignment, &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, pool->cb_priv)) { @@ -2046,18 +2120,22 @@ static void setup_frame_size(AV1_COMMON *cm, int frame_size_override_flag, } unlock_buffer_pool(pool); - pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x; - pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y; - pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; - pool->frame_bufs[cm->new_fb_idx].buf.color_primaries = cm->color_primaries; + pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = + seq_params->subsampling_x; + pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = + seq_params->subsampling_y; + pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = + (unsigned int)seq_params->bit_depth; + pool->frame_bufs[cm->new_fb_idx].buf.color_primaries = + seq_params->color_primaries; pool->frame_bufs[cm->new_fb_idx].buf.transfer_characteristics = - cm->transfer_characteristics; + seq_params->transfer_characteristics; pool->frame_bufs[cm->new_fb_idx].buf.matrix_coefficients = - cm->matrix_coefficients; - pool->frame_bufs[cm->new_fb_idx].buf.monochrome = cm->seq_params.monochrome; + seq_params->matrix_coefficients; + pool->frame_bufs[cm->new_fb_idx].buf.monochrome = seq_params->monochrome; pool->frame_bufs[cm->new_fb_idx].buf.chroma_sample_position = - cm->chroma_sample_position; - pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; + seq_params->chroma_sample_position; + pool->frame_bufs[cm->new_fb_idx].buf.color_range = seq_params->color_range; pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; } @@ -2095,9 +2173,10 @@ static void setup_frame_size_with_refs(AV1_COMMON *cm, } } + const SequenceHeader *const seq_params = &cm->seq_params; if (!found) { - int num_bits_width = cm->seq_params.num_bits_width; - int num_bits_height = cm->seq_params.num_bits_height; + int num_bits_width = seq_params->num_bits_width; + int num_bits_height = seq_params->num_bits_height; av1_read_frame_size(rb, num_bits_width, num_bits_height, &width, &height); setup_superres(cm, rb, &width, &height); @@ -2122,18 +2201,19 @@ static void setup_frame_size_with_refs(AV1_COMMON *cm, "Referenced frame has invalid size"); for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) { RefBuffer *const ref_frame = &cm->frame_refs[i]; - if (!valid_ref_frame_img_fmt(ref_frame->buf->bit_depth, - ref_frame->buf->subsampling_x, - ref_frame->buf->subsampling_y, cm->bit_depth, - cm->subsampling_x, cm->subsampling_y)) + if (!valid_ref_frame_img_fmt( + ref_frame->buf->bit_depth, ref_frame->buf->subsampling_x, + ref_frame->buf->subsampling_y, seq_params->bit_depth, + seq_params->subsampling_x, seq_params->subsampling_y)) aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, "Referenced frame has incompatible color format"); } lock_buffer_pool(pool); if (aom_realloc_frame_buffer( - get_frame_new_buffer(cm), cm->width, cm->height, cm->subsampling_x, - cm->subsampling_y, cm->use_highbitdepth, AOM_BORDER_IN_PIXELS, + get_frame_new_buffer(cm), cm->width, cm->height, + seq_params->subsampling_x, seq_params->subsampling_y, + seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS, cm->byte_alignment, &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, pool->cb_priv)) { @@ -2143,18 +2223,22 @@ static void setup_frame_size_with_refs(AV1_COMMON *cm, } unlock_buffer_pool(pool); - pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x; - pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y; - pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; - pool->frame_bufs[cm->new_fb_idx].buf.color_primaries = cm->color_primaries; + pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = + seq_params->subsampling_x; + pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = + seq_params->subsampling_y; + pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = + (unsigned int)seq_params->bit_depth; + pool->frame_bufs[cm->new_fb_idx].buf.color_primaries = + seq_params->color_primaries; pool->frame_bufs[cm->new_fb_idx].buf.transfer_characteristics = - cm->transfer_characteristics; + seq_params->transfer_characteristics; pool->frame_bufs[cm->new_fb_idx].buf.matrix_coefficients = - cm->matrix_coefficients; - pool->frame_bufs[cm->new_fb_idx].buf.monochrome = cm->seq_params.monochrome; + seq_params->matrix_coefficients; + pool->frame_bufs[cm->new_fb_idx].buf.monochrome = seq_params->monochrome; pool->frame_bufs[cm->new_fb_idx].buf.chroma_sample_position = - cm->chroma_sample_position; - pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; + seq_params->chroma_sample_position; + pool->frame_bufs[cm->new_fb_idx].buf.color_range = seq_params->color_range; pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; } @@ -2500,8 +2584,15 @@ static void get_tile_buffers(AV1Decoder *pbi, const uint8_t *data, } } -static void set_cb_buffer(MACROBLOCKD *const xd, CB_BUFFER *cb_buffer, - const int num_planes) { +static void set_cb_buffer(AV1Decoder *pbi, MACROBLOCKD *const xd, + CB_BUFFER *cb_buffer_base, const int num_planes, + int mi_row, int mi_col) { + AV1_COMMON *const cm = &pbi->common; + int mib_size_log2 = cm->seq_params.mib_size_log2; + int stride = (cm->mi_cols >> mib_size_log2) + 1; + int offset = (mi_row >> mib_size_log2) * stride + (mi_col >> mib_size_log2); + CB_BUFFER *cb_buffer = cb_buffer_base + offset; + for (int plane = 0; plane < num_planes; ++plane) { xd->plane[plane].dqcoeff_block = cb_buffer->dqcoeff[plane]; xd->plane[plane].eob_data = cb_buffer->eob_data[plane]; @@ -2514,18 +2605,189 @@ static void set_cb_buffer(MACROBLOCKD *const xd, CB_BUFFER *cb_buffer, xd->color_index_map_offset[1] = 0; } +static void decoder_alloc_tile_data(AV1Decoder *pbi, const int n_tiles) { + AV1_COMMON *const cm = &pbi->common; + aom_free(pbi->tile_data); + CHECK_MEM_ERROR(cm, pbi->tile_data, + aom_memalign(32, n_tiles * sizeof(*pbi->tile_data))); + pbi->allocated_tiles = n_tiles; + for (int i = 0; i < n_tiles; i++) { + TileDataDec *const tile_data = pbi->tile_data + i; + av1_zero(tile_data->dec_row_mt_sync); + } + pbi->allocated_row_mt_sync_rows = 0; +} + +// Set up nsync by width. +static INLINE int get_sync_range(int width) { +// nsync numbers are picked by testing. +#if 0 + if (width < 640) + return 1; + else if (width <= 1280) + return 2; + else if (width <= 4096) + return 4; + else + return 8; +#else + (void)width; +#endif + return 1; +} + +// Allocate memory for decoder row synchronization +static void dec_row_mt_alloc(AV1DecRowMTSync *dec_row_mt_sync, AV1_COMMON *cm, + int rows) { + dec_row_mt_sync->allocated_sb_rows = rows; +#if CONFIG_MULTITHREAD + { + int i; + + CHECK_MEM_ERROR(cm, dec_row_mt_sync->mutex_, + aom_malloc(sizeof(*(dec_row_mt_sync->mutex_)) * rows)); + if (dec_row_mt_sync->mutex_) { + for (i = 0; i < rows; ++i) { + pthread_mutex_init(&dec_row_mt_sync->mutex_[i], NULL); + } + } + + CHECK_MEM_ERROR(cm, dec_row_mt_sync->cond_, + aom_malloc(sizeof(*(dec_row_mt_sync->cond_)) * rows)); + if (dec_row_mt_sync->cond_) { + for (i = 0; i < rows; ++i) { + pthread_cond_init(&dec_row_mt_sync->cond_[i], NULL); + } + } + } +#endif // CONFIG_MULTITHREAD + + CHECK_MEM_ERROR(cm, dec_row_mt_sync->cur_sb_col, + aom_malloc(sizeof(*(dec_row_mt_sync->cur_sb_col)) * rows)); + + // Set up nsync. + dec_row_mt_sync->sync_range = get_sync_range(cm->width); +} + +// Deallocate decoder row synchronization related mutex and data +void av1_dec_row_mt_dealloc(AV1DecRowMTSync *dec_row_mt_sync) { + if (dec_row_mt_sync != NULL) { +#if CONFIG_MULTITHREAD + int i; + if (dec_row_mt_sync->mutex_ != NULL) { + for (i = 0; i < dec_row_mt_sync->allocated_sb_rows; ++i) { + pthread_mutex_destroy(&dec_row_mt_sync->mutex_[i]); + } + aom_free(dec_row_mt_sync->mutex_); + } + if (dec_row_mt_sync->cond_ != NULL) { + for (i = 0; i < dec_row_mt_sync->allocated_sb_rows; ++i) { + pthread_cond_destroy(&dec_row_mt_sync->cond_[i]); + } + aom_free(dec_row_mt_sync->cond_); + } +#endif // CONFIG_MULTITHREAD + aom_free(dec_row_mt_sync->cur_sb_col); + + // clear the structure as the source of this call may be a resize in which + // case this call will be followed by an _alloc() which may fail. + av1_zero(*dec_row_mt_sync); + } +} + +static INLINE void sync_read(AV1DecRowMTSync *const dec_row_mt_sync, int r, + int c) { +#if CONFIG_MULTITHREAD + const int nsync = dec_row_mt_sync->sync_range; + + if (r && !(c & (nsync - 1))) { + pthread_mutex_t *const mutex = &dec_row_mt_sync->mutex_[r - 1]; + pthread_mutex_lock(mutex); + + while (c > dec_row_mt_sync->cur_sb_col[r - 1] - nsync) { + pthread_cond_wait(&dec_row_mt_sync->cond_[r - 1], mutex); + } + pthread_mutex_unlock(mutex); + } +#else + (void)dec_row_mt_sync; + (void)r; + (void)c; +#endif // CONFIG_MULTITHREAD +} + +static INLINE void sync_write(AV1DecRowMTSync *const dec_row_mt_sync, int r, + int c, const int sb_cols) { +#if CONFIG_MULTITHREAD + const int nsync = dec_row_mt_sync->sync_range; + int cur; + int sig = 1; + + if (c < sb_cols - 1) { + cur = c; + if (c % nsync) sig = 0; + } else { + cur = sb_cols + nsync; + } + + if (sig) { + pthread_mutex_lock(&dec_row_mt_sync->mutex_[r]); + + dec_row_mt_sync->cur_sb_col[r] = cur; + + pthread_cond_signal(&dec_row_mt_sync->cond_[r]); + pthread_mutex_unlock(&dec_row_mt_sync->mutex_[r]); + } +#else + (void)dec_row_mt_sync; + (void)r; + (void)c; + (void)sb_cols; +#endif // CONFIG_MULTITHREAD +} + +static INLINE int get_sb_rows_in_tile(AV1Decoder *pbi, TileInfo tile) { + AV1_COMMON *cm = &pbi->common; + int mi_rows_aligned_to_sb = ALIGN_POWER_OF_TWO( + tile.mi_row_end - tile.mi_row_start, cm->seq_params.mib_size_log2); + int sb_rows = mi_rows_aligned_to_sb >> cm->seq_params.mib_size_log2; + + return sb_rows; +} + +static INLINE int get_sb_cols_in_tile(AV1Decoder *pbi, TileInfo tile) { + AV1_COMMON *cm = &pbi->common; + int mi_cols_aligned_to_sb = ALIGN_POWER_OF_TWO( + tile.mi_col_end - tile.mi_col_start, cm->seq_params.mib_size_log2); + int sb_cols = mi_cols_aligned_to_sb >> cm->seq_params.mib_size_log2; + + return sb_cols; +} + static void decode_tile_sb_row(AV1Decoder *pbi, ThreadData *const td, TileInfo tile_info, const int mi_row) { AV1_COMMON *const cm = &pbi->common; const int num_planes = av1_num_planes(cm); - av1_zero_left_context(&td->xd); + TileDataDec *const tile_data = + pbi->tile_data + tile_info.tile_row * cm->tile_cols + tile_info.tile_col; + const int sb_cols_in_tile = get_sb_cols_in_tile(pbi, tile_info); + const int sb_row_in_tile = + (mi_row - tile_info.mi_row_start) >> cm->seq_params.mib_size_log2; + int sb_col_in_tile = 0; for (int mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end; - mi_col += cm->seq_params.mib_size) { - set_cb_buffer(&td->xd, &td->cb_buffer_base, num_planes); + mi_col += cm->seq_params.mib_size, sb_col_in_tile++) { + set_cb_buffer(pbi, &td->xd, pbi->cb_buffer_base, num_planes, mi_row, + mi_col); + + sync_read(&tile_data->dec_row_mt_sync, sb_row_in_tile, sb_col_in_tile); - decode_partition(pbi, &td->xd, mi_row, mi_col, td->bit_reader, - cm->seq_params.sb_size); + // Decoding of the super-block + decode_partition(pbi, td, mi_row, mi_col, td->bit_reader, + cm->seq_params.sb_size, 0x2); + + sync_write(&tile_data->dec_row_mt_sync, sb_row_in_tile, sb_col_in_tile, + sb_cols_in_tile); } } @@ -2555,6 +2817,27 @@ static int check_trailing_bits_after_symbol_coder(aom_reader *r) { return 0; } +static void set_decode_func_pointers(ThreadData *td, int parse_decode_flag) { + td->read_coeffs_tx_intra_block_visit = decode_block_void; + td->predict_and_recon_intra_block_visit = decode_block_void; + td->read_coeffs_tx_inter_block_visit = decode_block_void; + td->inverse_tx_inter_block_visit = decode_block_void; + td->predict_inter_block_visit = predict_inter_block_void; + td->cfl_store_inter_block_visit = cfl_store_inter_block_void; + + if (parse_decode_flag & 0x1) { + td->read_coeffs_tx_intra_block_visit = read_coeffs_tx_intra_block; + td->read_coeffs_tx_inter_block_visit = av1_read_coeffs_txb_facade; + } + if (parse_decode_flag & 0x2) { + td->predict_and_recon_intra_block_visit = + predict_and_reconstruct_intra_block; + td->inverse_tx_inter_block_visit = inverse_transform_inter_block; + td->predict_inter_block_visit = predict_inter_block; + td->cfl_store_inter_block_visit = cfl_store_inter_block; + } +} + static void decode_tile(AV1Decoder *pbi, ThreadData *const td, int tile_row, int tile_col) { TileInfo tile_info; @@ -2564,13 +2847,23 @@ static void decode_tile(AV1Decoder *pbi, ThreadData *const td, int tile_row, av1_tile_set_row(&tile_info, cm, tile_row); av1_tile_set_col(&tile_info, cm, tile_col); - av1_zero_above_context(cm, tile_info.mi_col_start, tile_info.mi_col_end, - tile_row); + av1_zero_above_context(cm, &td->xd, tile_info.mi_col_start, + tile_info.mi_col_end, tile_row); + av1_reset_loop_filter_delta(&td->xd, num_planes); av1_reset_loop_restoration(&td->xd, num_planes); for (int mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end; mi_row += cm->seq_params.mib_size) { - decode_tile_sb_row(pbi, td, tile_info, mi_row); + av1_zero_left_context(&td->xd); + + for (int mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end; + mi_col += cm->seq_params.mib_size) { + set_cb_buffer(pbi, &td->xd, &td->cb_buffer_base, num_planes, 0, 0); + + // Bit-stream parsing and decoding of the superblock + decode_partition(pbi, td, mi_row, mi_col, td->bit_reader, + cm->seq_params.sb_size, 0x3); + } } int corrupted = @@ -2582,6 +2875,7 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data, const uint8_t *data_end, int start_tile, int end_tile) { AV1_COMMON *const cm = &pbi->common; + ThreadData *const td = &pbi->td; const int tile_cols = cm->tile_cols; const int tile_rows = cm->tile_rows; const int n_tiles = tile_cols * tile_rows; @@ -2641,23 +2935,26 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data, get_tile_buffers(pbi, data, data_end, tile_buffers, start_tile, end_tile); if (pbi->tile_data == NULL || n_tiles != pbi->allocated_tiles) { - aom_free(pbi->tile_data); - CHECK_MEM_ERROR(cm, pbi->tile_data, - aom_memalign(32, n_tiles * (sizeof(*pbi->tile_data)))); - pbi->allocated_tiles = n_tiles; + decoder_alloc_tile_data(pbi, n_tiles); } #if CONFIG_ACCOUNTING if (pbi->acct_enabled) { aom_accounting_reset(&pbi->accounting); } #endif + + set_decode_func_pointers(&pbi->td, 0x3); + // Load all tile information into thread_data. + td->xd = pbi->mb; + td->xd.corrupted = 0; + td->xd.mc_buf[0] = td->mc_buf[0]; + td->xd.mc_buf[1] = td->mc_buf[1]; for (tile_row = tile_rows_start; tile_row < tile_rows_end; ++tile_row) { const int row = inv_row_order ? tile_rows - 1 - tile_row : tile_row; for (tile_col = tile_cols_start; tile_col < tile_cols_end; ++tile_col) { const int col = inv_col_order ? tile_cols - 1 - tile_col : tile_col; - ThreadData *const td = &pbi->td; TileDataDec *const tile_data = pbi->tile_data + row * cm->tile_cols + col; const TileBufferDec *const tile_bs_buf = &tile_buffers[row][col]; @@ -2665,13 +2962,10 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data, row * cm->tile_cols + col > end_tile) continue; - td->xd = pbi->mb; - td->xd.corrupted = 0; - td->xd.mc_buf[0] = pbi->td.mc_buf[0]; - td->xd.mc_buf[1] = pbi->td.mc_buf[1]; td->bit_reader = &tile_data->bit_reader; av1_zero(td->dqcoeff); av1_tile_init(&td->xd.tile, cm, row, col); + td->xd.current_qindex = cm->base_qindex; setup_bool_decoder(tile_bs_buf->data, data_end, tile_bs_buf->size, &cm->error, td->bit_reader, allow_update_cdf); #if CONFIG_ACCOUNTING @@ -2691,7 +2985,7 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data, td->xd.tile_ctx = &tile_data->tctx; // decode tile - decode_tile(pbi, &pbi->td, row, col); + decode_tile(pbi, td, row, col); aom_merge_corrupted_flag(&pbi->mb.corrupted, td->xd.corrupted); if (pbi->mb.corrupted) aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, @@ -2729,6 +3023,47 @@ static TileJobsDec *get_dec_job_info(AV1DecTileMT *tile_mt_info) { return cur_job_info; } +static void tile_worker_hook_init(AV1Decoder *const pbi, + DecWorkerData *const thread_data, + const TileBufferDec *const tile_buffer, + TileDataDec *const tile_data, + uint8_t allow_update_cdf) { + AV1_COMMON *cm = &pbi->common; + ThreadData *const td = thread_data->td; + int tile_row = tile_data->tile_info.tile_row; + int tile_col = tile_data->tile_info.tile_col; + + td->bit_reader = &tile_data->bit_reader; + av1_zero(td->dqcoeff); + av1_tile_init(&td->xd.tile, cm, tile_row, tile_col); + td->xd.current_qindex = cm->base_qindex; + setup_bool_decoder(tile_buffer->data, thread_data->data_end, + tile_buffer->size, &thread_data->error_info, + td->bit_reader, allow_update_cdf); +#if CONFIG_ACCOUNTING + if (pbi->acct_enabled) { + td->bit_reader->accounting = &pbi->accounting; + td->bit_reader->accounting->last_tell_frac = + aom_reader_tell_frac(td->bit_reader); + } else { + td->bit_reader->accounting = NULL; + } +#endif + av1_init_macroblockd(cm, &td->xd, td->dqcoeff); + td->xd.error_info = &thread_data->error_info; + av1_init_above_context(cm, &td->xd, tile_row); + + // Initialise the tile context from the frame context + tile_data->tctx = *cm->fc; + td->xd.tile_ctx = &tile_data->tctx; +#if CONFIG_ACCOUNTING + if (pbi->acct_enabled) { + tile_data->bit_reader.accounting->last_tell_frac = + aom_reader_tell_frac(&tile_data->bit_reader); + } +#endif +} + static int tile_worker_hook(void *arg1, void *arg2) { DecWorkerData *const thread_data = (DecWorkerData *)arg1; AV1Decoder *const pbi = (AV1Decoder *)arg2; @@ -2736,14 +3071,21 @@ static int tile_worker_hook(void *arg1, void *arg2) { ThreadData *const td = thread_data->td; uint8_t allow_update_cdf; + // The jmp_buf is valid only for the duration of the function that calls + // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 + // before it returns. if (setjmp(thread_data->error_info.jmp)) { thread_data->error_info.setjmp = 0; thread_data->td->xd.corrupted = 1; return 0; } + thread_data->error_info.setjmp = 1; + allow_update_cdf = cm->large_scale_tile ? 0 : 1; allow_update_cdf = allow_update_cdf && !cm->disable_cdf_update; + set_decode_func_pointers(td, 0x3); + assert(cm->tile_cols > 0); while (1) { TileJobsDec *cur_job_info = get_dec_job_info(&pbi->tile_mt_info); @@ -2751,46 +3093,248 @@ static int tile_worker_hook(void *arg1, void *arg2) { if (cur_job_info != NULL && !td->xd.corrupted) { const TileBufferDec *const tile_buffer = cur_job_info->tile_buffer; TileDataDec *const tile_data = cur_job_info->tile_data; - volatile int tile_row = tile_data->tile_info.tile_row; - volatile int tile_col = tile_data->tile_info.tile_col; + tile_worker_hook_init(pbi, thread_data, tile_buffer, tile_data, + allow_update_cdf); + // decode tile + int tile_row = tile_data->tile_info.tile_row; + int tile_col = tile_data->tile_info.tile_col; + decode_tile(pbi, td, tile_row, tile_col); + } else { + break; + } + } + thread_data->error_info.setjmp = 0; + return !td->xd.corrupted; +} - td->xd = pbi->mb; - td->xd.corrupted = 0; - td->xd.mc_buf[0] = td->mc_buf[0]; - td->xd.mc_buf[1] = td->mc_buf[1]; - td->bit_reader = &tile_data->bit_reader; - av1_zero(td->dqcoeff); - av1_tile_init(&td->xd.tile, cm, tile_row, tile_col); - setup_bool_decoder(tile_buffer->data, thread_data->data_end, - tile_buffer->size, &cm->error, td->bit_reader, - allow_update_cdf); -#if CONFIG_ACCOUNTING - if (pbi->acct_enabled) { - td->bit_reader->accounting = &pbi->accounting; - td->bit_reader->accounting->last_tell_frac = - aom_reader_tell_frac(td->bit_reader); - } else { - td->bit_reader->accounting = NULL; +static int get_next_job_info(AV1Decoder *const pbi, + AV1DecRowMTJobInfo *next_job_info, + int *end_of_frame) { + AV1_COMMON *cm = &pbi->common; + TileDataDec *tile_data; + AV1DecRowMTSync *dec_row_mt_sync; + AV1DecRowMTInfo *frame_row_mt_info = &pbi->frame_row_mt_info; + TileInfo tile_info; + const int tile_rows_start = frame_row_mt_info->tile_rows_start; + const int tile_rows_end = frame_row_mt_info->tile_rows_end; + const int tile_cols_start = frame_row_mt_info->tile_cols_start; + const int tile_cols_end = frame_row_mt_info->tile_cols_end; + const int start_tile = frame_row_mt_info->start_tile; + const int end_tile = frame_row_mt_info->end_tile; + const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size]; + int num_mis_to_decode, num_threads_working; + int num_mis_waiting_for_decode; + int min_threads_working = INT_MAX; + int max_mis_to_decode = 0; + int tile_row_idx, tile_col_idx; + int tile_row = 0; + int tile_col = 0; + + memset(next_job_info, 0, sizeof(*next_job_info)); + + // Frame decode is completed or error is encountered. + *end_of_frame = (frame_row_mt_info->mi_rows_decode_started == + frame_row_mt_info->mi_rows_to_decode) || + (frame_row_mt_info->row_mt_exit == 1); + if (*end_of_frame) { + return 1; + } + + // Decoding cannot start as bit-stream parsing is not complete. + if (frame_row_mt_info->mi_rows_parse_done - + frame_row_mt_info->mi_rows_decode_started == + 0) + return 0; + + // Choose the tile to decode. + for (tile_row_idx = tile_rows_start; tile_row_idx < tile_rows_end; + ++tile_row_idx) { + for (tile_col_idx = tile_cols_start; tile_col_idx < tile_cols_end; + ++tile_col_idx) { + if (tile_row_idx * cm->tile_cols + tile_col_idx < start_tile || + tile_row_idx * cm->tile_cols + tile_col_idx > end_tile) + continue; + + tile_data = pbi->tile_data + tile_row_idx * cm->tile_cols + tile_col_idx; + dec_row_mt_sync = &tile_data->dec_row_mt_sync; + + num_threads_working = dec_row_mt_sync->num_threads_working; + num_mis_waiting_for_decode = (dec_row_mt_sync->mi_rows_parse_done - + dec_row_mt_sync->mi_rows_decode_started) * + dec_row_mt_sync->mi_cols; + num_mis_to_decode = + (dec_row_mt_sync->mi_rows - dec_row_mt_sync->mi_rows_decode_started) * + dec_row_mt_sync->mi_cols; + + assert(num_mis_to_decode >= num_mis_waiting_for_decode); + + // Pick the tile which has minimum number of threads working on it. + if (num_mis_waiting_for_decode > 0) { + if (num_threads_working < min_threads_working) { + min_threads_working = num_threads_working; + max_mis_to_decode = 0; + } + if (num_threads_working == min_threads_working && + num_mis_to_decode > max_mis_to_decode) { + max_mis_to_decode = num_mis_to_decode; + tile_row = tile_row_idx; + tile_col = tile_col_idx; + } } + } + } + + tile_data = pbi->tile_data + tile_row * cm->tile_cols + tile_col; + tile_info = tile_data->tile_info; + dec_row_mt_sync = &tile_data->dec_row_mt_sync; + + next_job_info->tile_row = tile_row; + next_job_info->tile_col = tile_col; + next_job_info->mi_row = + dec_row_mt_sync->mi_rows_decode_started + tile_info.mi_row_start; + + dec_row_mt_sync->num_threads_working++; + dec_row_mt_sync->mi_rows_decode_started += sb_mi_size; + frame_row_mt_info->mi_rows_decode_started += sb_mi_size; + + return 1; +} + +static INLINE void signal_parse_sb_row_done(AV1Decoder *const pbi, + TileDataDec *const tile_data, + const int sb_mi_size) { + AV1DecRowMTInfo *frame_row_mt_info = &pbi->frame_row_mt_info; +#if CONFIG_MULTITHREAD + pthread_mutex_lock(pbi->row_mt_mutex_); #endif - av1_init_macroblockd(cm, &td->xd, td->dqcoeff); - av1_init_above_context(cm, &td->xd, tile_row); + tile_data->dec_row_mt_sync.mi_rows_parse_done += sb_mi_size; + frame_row_mt_info->mi_rows_parse_done += sb_mi_size; +#if CONFIG_MULTITHREAD + pthread_cond_broadcast(pbi->row_mt_cond_); + pthread_mutex_unlock(pbi->row_mt_mutex_); +#endif +} - // Initialise the tile context from the frame context - tile_data->tctx = *cm->fc; - td->xd.tile_ctx = &tile_data->tctx; -#if CONFIG_ACCOUNTING - if (pbi->acct_enabled) { - tile_data->bit_reader.accounting->last_tell_frac = - aom_reader_tell_frac(&tile_data->bit_reader); - } +static int row_mt_worker_hook(void *arg1, void *arg2) { + DecWorkerData *const thread_data = (DecWorkerData *)arg1; + AV1Decoder *const pbi = (AV1Decoder *)arg2; + AV1_COMMON *cm = &pbi->common; + ThreadData *const td = thread_data->td; + uint8_t allow_update_cdf; + const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size]; + AV1DecRowMTInfo *frame_row_mt_info = &pbi->frame_row_mt_info; + td->xd.corrupted = 0; + + // The jmp_buf is valid only for the duration of the function that calls + // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 + // before it returns. + if (setjmp(thread_data->error_info.jmp)) { + thread_data->error_info.setjmp = 0; + thread_data->td->xd.corrupted = 1; +#if CONFIG_MULTITHREAD + pthread_mutex_lock(pbi->row_mt_mutex_); #endif + frame_row_mt_info->row_mt_exit = 1; +#if CONFIG_MULTITHREAD + pthread_mutex_unlock(pbi->row_mt_mutex_); +#endif + return 0; + } + thread_data->error_info.setjmp = 1; + + const int num_planes = av1_num_planes(cm); + allow_update_cdf = cm->large_scale_tile ? 0 : 1; + allow_update_cdf = allow_update_cdf && !cm->disable_cdf_update; + + assert(cm->tile_cols > 0); + while (1) { + TileJobsDec *cur_job_info = get_dec_job_info(&pbi->tile_mt_info); + + if (cur_job_info != NULL && !td->xd.corrupted) { + const TileBufferDec *const tile_buffer = cur_job_info->tile_buffer; + TileDataDec *const tile_data = cur_job_info->tile_data; + tile_worker_hook_init(pbi, thread_data, tile_buffer, tile_data, + allow_update_cdf); + + set_decode_func_pointers(td, 0x1); + // decode tile - decode_tile(pbi, td, tile_row, tile_col); + TileInfo tile_info = tile_data->tile_info; + int tile_row = tile_info.tile_row; + + av1_zero_above_context(cm, &td->xd, tile_info.mi_col_start, + tile_info.mi_col_end, tile_row); + av1_reset_loop_filter_delta(&td->xd, num_planes); + av1_reset_loop_restoration(&td->xd, num_planes); + + for (int mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end; + mi_row += cm->seq_params.mib_size) { + av1_zero_left_context(&td->xd); + + for (int mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end; + mi_col += cm->seq_params.mib_size) { + set_cb_buffer(pbi, &td->xd, pbi->cb_buffer_base, num_planes, mi_row, + mi_col); + + // Bit-stream parsing of the superblock + decode_partition(pbi, td, mi_row, mi_col, td->bit_reader, + cm->seq_params.sb_size, 0x1); + } + signal_parse_sb_row_done(pbi, tile_data, sb_mi_size); + } + + int corrupted = + (check_trailing_bits_after_symbol_coder(td->bit_reader)) ? 1 : 0; + aom_merge_corrupted_flag(&td->xd.corrupted, corrupted); } else { break; } } + + set_decode_func_pointers(td, 0x2); + + while (1) { + AV1DecRowMTJobInfo next_job_info; + int end_of_frame = 0; + +#if CONFIG_MULTITHREAD + pthread_mutex_lock(pbi->row_mt_mutex_); +#endif + while (!get_next_job_info(pbi, &next_job_info, &end_of_frame)) { +#if CONFIG_MULTITHREAD + pthread_cond_wait(pbi->row_mt_cond_, pbi->row_mt_mutex_); +#endif + } +#if CONFIG_MULTITHREAD + pthread_mutex_unlock(pbi->row_mt_mutex_); +#endif + + if (end_of_frame) break; + + int tile_row = next_job_info.tile_row; + int tile_col = next_job_info.tile_col; + int mi_row = next_job_info.mi_row; + + TileDataDec *tile_data = + pbi->tile_data + tile_row * cm->tile_cols + tile_col; + AV1DecRowMTSync *dec_row_mt_sync = &tile_data->dec_row_mt_sync; + TileInfo tile_info = tile_data->tile_info; + + av1_tile_init(&td->xd.tile, cm, tile_row, tile_col); + av1_init_macroblockd(cm, &td->xd, td->dqcoeff); + td->xd.error_info = &thread_data->error_info; + + decode_tile_sb_row(pbi, td, tile_info, mi_row); + +#if CONFIG_MULTITHREAD + pthread_mutex_lock(pbi->row_mt_mutex_); +#endif + dec_row_mt_sync->num_threads_working--; +#if CONFIG_MULTITHREAD + pthread_mutex_unlock(pbi->row_mt_mutex_); +#endif + } + thread_data->error_info.setjmp = 0; return !td->xd.corrupted; } @@ -2842,8 +3386,7 @@ static void alloc_dec_jobs(AV1DecTileMT *tile_mt_info, AV1_COMMON *cm, aom_malloc(sizeof(*tile_mt_info->job_queue) * num_tiles)); } -void av1_free_mc_tmp_buf(void *td, int use_highbd) { - ThreadData *thread_data = (ThreadData *)td; +void av1_free_mc_tmp_buf(ThreadData *thread_data, int use_highbd) { int ref; for (ref = 0; ref < 2; ref++) { if (use_highbd) @@ -2855,10 +3398,8 @@ void av1_free_mc_tmp_buf(void *td, int use_highbd) { thread_data->mc_buf_size = 0; } -static void allocate_mc_tmp_buf(AV1_COMMON *const cm, void *td, int buf_size, - int use_highbd) { - ThreadData *thread_data = (ThreadData *)td; - +static void allocate_mc_tmp_buf(AV1_COMMON *const cm, ThreadData *thread_data, + int buf_size, int use_highbd) { for (int ref = 0; ref < 2; ref++) { if (use_highbd) { uint16_t *hbd_mc_buf; @@ -2872,11 +3413,130 @@ static void allocate_mc_tmp_buf(AV1_COMMON *const cm, void *td, int buf_size, thread_data->mc_buf_size = buf_size; } +static void reset_dec_workers(AV1Decoder *pbi, AVxWorkerHook worker_hook, + int num_workers) { + const AVxWorkerInterface *const winterface = aom_get_worker_interface(); + + // Reset tile decoding hook + for (int worker_idx = 0; worker_idx < num_workers; ++worker_idx) { + AVxWorker *const worker = &pbi->tile_workers[worker_idx]; + DecWorkerData *const thread_data = pbi->thread_data + worker_idx; + thread_data->td->xd = pbi->mb; + thread_data->td->xd.corrupted = 0; + thread_data->td->xd.mc_buf[0] = thread_data->td->mc_buf[0]; + thread_data->td->xd.mc_buf[1] = thread_data->td->mc_buf[1]; + winterface->sync(worker); + + worker->hook = worker_hook; + worker->data1 = thread_data; + worker->data2 = pbi; + } +#if CONFIG_ACCOUNTING + if (pbi->acct_enabled) { + aom_accounting_reset(&pbi->accounting); + } +#endif +} + +static void launch_dec_workers(AV1Decoder *pbi, const uint8_t *data_end, + int num_workers) { + const AVxWorkerInterface *const winterface = aom_get_worker_interface(); + + for (int worker_idx = 0; worker_idx < num_workers; ++worker_idx) { + AVxWorker *const worker = &pbi->tile_workers[worker_idx]; + DecWorkerData *const thread_data = (DecWorkerData *)worker->data1; + + thread_data->data_end = data_end; + + worker->had_error = 0; + if (worker_idx == num_workers - 1) { + winterface->execute(worker); + } else { + winterface->launch(worker); + } + } +} + +static void sync_dec_workers(AV1Decoder *pbi, int num_workers) { + const AVxWorkerInterface *const winterface = aom_get_worker_interface(); + int corrupted = 0; + + for (int worker_idx = num_workers; worker_idx > 0; --worker_idx) { + AVxWorker *const worker = &pbi->tile_workers[worker_idx - 1]; + aom_merge_corrupted_flag(&corrupted, !winterface->sync(worker)); + } + + pbi->mb.corrupted = corrupted; +} + +static void decode_mt_init(AV1Decoder *pbi) { + AV1_COMMON *const cm = &pbi->common; + const AVxWorkerInterface *const winterface = aom_get_worker_interface(); + int worker_idx; + + // Create workers and thread_data + if (pbi->num_workers == 0) { + const int num_threads = pbi->max_threads; + CHECK_MEM_ERROR(cm, pbi->tile_workers, + aom_malloc(num_threads * sizeof(*pbi->tile_workers))); + CHECK_MEM_ERROR(cm, pbi->thread_data, + aom_malloc(num_threads * sizeof(*pbi->thread_data))); + + for (worker_idx = 0; worker_idx < num_threads; ++worker_idx) { + AVxWorker *const worker = &pbi->tile_workers[worker_idx]; + DecWorkerData *const thread_data = pbi->thread_data + worker_idx; + ++pbi->num_workers; + + winterface->init(worker); + if (worker_idx < num_threads - 1 && !winterface->reset(worker)) { + aom_internal_error(&cm->error, AOM_CODEC_ERROR, + "Tile decoder thread creation failed"); + } + + if (worker_idx < num_threads - 1) { + // Allocate thread data. + CHECK_MEM_ERROR(cm, thread_data->td, + aom_memalign(32, sizeof(*thread_data->td))); + av1_zero(*thread_data->td); + } else { + // Main thread acts as a worker and uses the thread data in pbi + thread_data->td = &pbi->td; + } + thread_data->error_info.error_code = AOM_CODEC_OK; + thread_data->error_info.setjmp = 0; + } + } + const int use_highbd = cm->seq_params.use_highbitdepth ? 1 : 0; + const int buf_size = MC_TEMP_BUF_PELS << use_highbd; + for (worker_idx = 0; worker_idx < pbi->max_threads - 1; ++worker_idx) { + DecWorkerData *const thread_data = pbi->thread_data + worker_idx; + if (thread_data->td->mc_buf_size != buf_size) { + av1_free_mc_tmp_buf(thread_data->td, use_highbd); + allocate_mc_tmp_buf(cm, thread_data->td, buf_size, use_highbd); + } + } +} + +static void tile_mt_queue(AV1Decoder *pbi, int tile_cols, int tile_rows, + int tile_rows_start, int tile_rows_end, + int tile_cols_start, int tile_cols_end, + int start_tile, int end_tile) { + AV1_COMMON *const cm = &pbi->common; + if (pbi->tile_mt_info.alloc_tile_cols != tile_cols || + pbi->tile_mt_info.alloc_tile_rows != tile_rows) { + av1_dealloc_dec_jobs(&pbi->tile_mt_info); + alloc_dec_jobs(&pbi->tile_mt_info, cm, tile_rows, tile_cols); + } + enqueue_tile_jobs(pbi, cm, tile_rows_start, tile_rows_end, tile_cols_start, + tile_cols_end, start_tile, end_tile); + qsort(pbi->tile_mt_info.job_queue, pbi->tile_mt_info.jobs_enqueued, + sizeof(pbi->tile_mt_info.job_queue[0]), compare_tile_buffers); +} + static const uint8_t *decode_tiles_mt(AV1Decoder *pbi, const uint8_t *data, const uint8_t *data_end, int start_tile, int end_tile) { AV1_COMMON *const cm = &pbi->common; - const AVxWorkerInterface *const winterface = aom_get_worker_interface(); const int tile_cols = cm->tile_cols; const int tile_rows = cm->tile_rows; const int n_tiles = tile_cols * tile_rows; @@ -2891,7 +3551,6 @@ static const uint8_t *decode_tiles_mt(AV1Decoder *pbi, const uint8_t *data, int tile_cols_end; int tile_count_tg; int num_workers; - int worker_idx; const uint8_t *raw_data_end = NULL; if (cm->large_scale_tile) { @@ -2923,48 +3582,188 @@ static const uint8_t *decode_tiles_mt(AV1Decoder *pbi, const uint8_t *data, assert(start_tile <= end_tile); assert(start_tile >= 0 && end_tile < n_tiles); - // Create workers and thread_data - if (pbi->num_workers == 0) { - const int num_threads = pbi->max_threads; - CHECK_MEM_ERROR(cm, pbi->tile_workers, - aom_malloc(num_threads * sizeof(*pbi->tile_workers))); - CHECK_MEM_ERROR(cm, pbi->thread_data, - aom_malloc(num_threads * sizeof(*pbi->thread_data))); + decode_mt_init(pbi); - for (worker_idx = 0; worker_idx < num_threads; ++worker_idx) { - AVxWorker *const worker = &pbi->tile_workers[worker_idx]; - DecWorkerData *const thread_data = pbi->thread_data + worker_idx; - ++pbi->num_workers; + // get tile size in tile group +#if EXT_TILE_DEBUG + if (cm->large_scale_tile) assert(pbi->ext_tile_debug == 1); + if (cm->large_scale_tile) + raw_data_end = get_ls_tile_buffers(pbi, data, data_end, tile_buffers); + else +#endif // EXT_TILE_DEBUG + get_tile_buffers(pbi, data, data_end, tile_buffers, start_tile, end_tile); - winterface->init(worker); - if (worker_idx < num_threads - 1 && !winterface->reset(worker)) { - aom_internal_error(&cm->error, AOM_CODEC_ERROR, - "Tile decoder thread creation failed"); - } + if (pbi->tile_data == NULL || n_tiles != pbi->allocated_tiles) { + decoder_alloc_tile_data(pbi, n_tiles); + } - if (worker_idx < num_threads - 1) { - // Allocate thread data. - CHECK_MEM_ERROR(cm, thread_data->td, - aom_memalign(32, sizeof(*thread_data->td))); - av1_zero(*thread_data->td); - } else { - // Main thread acts as a worker and uses the thread data in pbi - thread_data->td = &pbi->td; - } + for (int row = 0; row < tile_rows; row++) { + for (int col = 0; col < tile_cols; col++) { + TileDataDec *tile_data = pbi->tile_data + row * cm->tile_cols + col; + av1_tile_init(&tile_data->tile_info, cm, row, col); } } - const int use_highbd = cm->use_highbitdepth ? 1 : 0; - const int buf_size = MC_TEMP_BUF_PELS << use_highbd; - for (worker_idx = 0; worker_idx < pbi->max_threads - 1; ++worker_idx) { - DecWorkerData *const thread_data = pbi->thread_data + worker_idx; - if (thread_data->td->mc_buf_size != buf_size) { - av1_free_mc_tmp_buf(thread_data->td, use_highbd); - allocate_mc_tmp_buf(cm, thread_data->td, buf_size, use_highbd); + + tile_mt_queue(pbi, tile_cols, tile_rows, tile_rows_start, tile_rows_end, + tile_cols_start, tile_cols_end, start_tile, end_tile); + + reset_dec_workers(pbi, tile_worker_hook, num_workers); + launch_dec_workers(pbi, data_end, num_workers); + sync_dec_workers(pbi, num_workers); + + if (pbi->mb.corrupted) + aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, + "Failed to decode tile data"); + + if (cm->large_scale_tile) { + if (n_tiles == 1) { + // Find the end of the single tile buffer + return aom_reader_find_end(&pbi->tile_data->bit_reader); } + // Return the end of the last tile buffer + return raw_data_end; + } + TileDataDec *const tile_data = pbi->tile_data + end_tile; + + return aom_reader_find_end(&tile_data->bit_reader); +} + +static void dec_alloc_cb_buf(AV1Decoder *pbi) { + AV1_COMMON *const cm = &pbi->common; + int size = ((cm->mi_rows >> cm->seq_params.mib_size_log2) + 1) * + ((cm->mi_cols >> cm->seq_params.mib_size_log2) + 1); + + if (pbi->cb_buffer_alloc_size < size) { + av1_dec_free_cb_buf(pbi); + CHECK_MEM_ERROR(cm, pbi->cb_buffer_base, + aom_memalign(32, sizeof(*pbi->cb_buffer_base) * size)); + pbi->cb_buffer_alloc_size = size; + } +} + +static void row_mt_frame_init(AV1Decoder *pbi, int tile_rows_start, + int tile_rows_end, int tile_cols_start, + int tile_cols_end, int start_tile, int end_tile, + int max_sb_rows) { + AV1_COMMON *const cm = &pbi->common; + AV1DecRowMTInfo *frame_row_mt_info = &pbi->frame_row_mt_info; + + frame_row_mt_info->tile_rows_start = tile_rows_start; + frame_row_mt_info->tile_rows_end = tile_rows_end; + frame_row_mt_info->tile_cols_start = tile_cols_start; + frame_row_mt_info->tile_cols_end = tile_cols_end; + frame_row_mt_info->start_tile = start_tile; + frame_row_mt_info->end_tile = end_tile; + frame_row_mt_info->mi_rows_to_decode = 0; + frame_row_mt_info->mi_rows_parse_done = 0; + frame_row_mt_info->mi_rows_decode_started = 0; + frame_row_mt_info->row_mt_exit = 0; + + for (int tile_row = tile_rows_start; tile_row < tile_rows_end; ++tile_row) { + for (int tile_col = tile_cols_start; tile_col < tile_cols_end; ++tile_col) { + if (tile_row * cm->tile_cols + tile_col < start_tile || + tile_row * cm->tile_cols + tile_col > end_tile) + continue; + + TileDataDec *const tile_data = + pbi->tile_data + tile_row * cm->tile_cols + tile_col; + TileInfo tile_info = tile_data->tile_info; + + tile_data->dec_row_mt_sync.mi_rows_parse_done = 0; + tile_data->dec_row_mt_sync.mi_rows_decode_started = 0; + tile_data->dec_row_mt_sync.num_threads_working = 0; + tile_data->dec_row_mt_sync.mi_rows = + ALIGN_POWER_OF_TWO(tile_info.mi_row_end - tile_info.mi_row_start, + cm->seq_params.mib_size_log2); + tile_data->dec_row_mt_sync.mi_cols = + ALIGN_POWER_OF_TWO(tile_info.mi_col_end - tile_info.mi_col_start, + cm->seq_params.mib_size_log2); + + frame_row_mt_info->mi_rows_to_decode += + tile_data->dec_row_mt_sync.mi_rows; + + // Initialize cur_sb_col to -1 for all SB rows. + memset(tile_data->dec_row_mt_sync.cur_sb_col, -1, + sizeof(*tile_data->dec_row_mt_sync.cur_sb_col) * max_sb_rows); + } + } + +#if CONFIG_MULTITHREAD + if (pbi->row_mt_mutex_ == NULL) { + CHECK_MEM_ERROR(cm, pbi->row_mt_mutex_, + aom_malloc(sizeof(*(pbi->row_mt_mutex_)))); + if (pbi->row_mt_mutex_) { + pthread_mutex_init(pbi->row_mt_mutex_, NULL); + } + } + + if (pbi->row_mt_cond_ == NULL) { + CHECK_MEM_ERROR(cm, pbi->row_mt_cond_, + aom_malloc(sizeof(*(pbi->row_mt_cond_)))); + if (pbi->row_mt_cond_) { + pthread_cond_init(pbi->row_mt_cond_, NULL); + } + } +#endif +} + +static const uint8_t *decode_tiles_row_mt(AV1Decoder *pbi, const uint8_t *data, + const uint8_t *data_end, + int start_tile, int end_tile) { + AV1_COMMON *const cm = &pbi->common; + const int tile_cols = cm->tile_cols; + const int tile_rows = cm->tile_rows; + const int n_tiles = tile_cols * tile_rows; + TileBufferDec(*const tile_buffers)[MAX_TILE_COLS] = pbi->tile_buffers; + const int dec_tile_row = AOMMIN(pbi->dec_tile_row, tile_rows); + const int single_row = pbi->dec_tile_row >= 0; + const int dec_tile_col = AOMMIN(pbi->dec_tile_col, tile_cols); + const int single_col = pbi->dec_tile_col >= 0; + int tile_rows_start; + int tile_rows_end; + int tile_cols_start; + int tile_cols_end; + int tile_count_tg; + int num_workers; + const uint8_t *raw_data_end = NULL; + int max_sb_rows = 0; + + if (cm->large_scale_tile) { + tile_rows_start = single_row ? dec_tile_row : 0; + tile_rows_end = single_row ? dec_tile_row + 1 : tile_rows; + tile_cols_start = single_col ? dec_tile_col : 0; + tile_cols_end = single_col ? tile_cols_start + 1 : tile_cols; + } else { + tile_rows_start = 0; + tile_rows_end = tile_rows; + tile_cols_start = 0; + tile_cols_end = tile_cols; } + tile_count_tg = end_tile - start_tile + 1; + num_workers = pbi->max_threads; - // get tile size in tile group + // No tiles to decode. + if (tile_rows_end <= tile_rows_start || tile_cols_end <= tile_cols_start || + // First tile is larger than end_tile. + tile_rows_start * tile_cols + tile_cols_start > end_tile || + // Last tile is smaller than start_tile. + (tile_rows_end - 1) * tile_cols + tile_cols_end - 1 < start_tile) + return data; + + assert(tile_rows <= MAX_TILE_ROWS); + assert(tile_cols <= MAX_TILE_COLS); + assert(tile_count_tg > 0); + assert(num_workers > 0); + assert(start_tile <= end_tile); + assert(start_tile >= 0 && end_tile < n_tiles); + + (void)tile_count_tg; + + decode_mt_init(pbi); + + // get tile size in tile group #if EXT_TILE_DEBUG + if (cm->large_scale_tile) assert(pbi->ext_tile_debug == 1); if (cm->large_scale_tile) raw_data_end = get_ls_tile_buffers(pbi, data, data_end, tile_buffers); else @@ -2972,74 +3771,43 @@ static const uint8_t *decode_tiles_mt(AV1Decoder *pbi, const uint8_t *data, get_tile_buffers(pbi, data, data_end, tile_buffers, start_tile, end_tile); if (pbi->tile_data == NULL || n_tiles != pbi->allocated_tiles) { - aom_free(pbi->tile_data); - CHECK_MEM_ERROR(cm, pbi->tile_data, - aom_memalign(32, n_tiles * sizeof(*pbi->tile_data))); - pbi->allocated_tiles = n_tiles; + for (int i = 0; i < pbi->allocated_tiles; i++) { + TileDataDec *const tile_data = pbi->tile_data + i; + av1_dec_row_mt_dealloc(&tile_data->dec_row_mt_sync); + } + decoder_alloc_tile_data(pbi, n_tiles); } - // Reset tile decoding hook - for (worker_idx = 0; worker_idx < num_workers; ++worker_idx) { - AVxWorker *const worker = &pbi->tile_workers[worker_idx]; - DecWorkerData *const thread_data = pbi->thread_data + worker_idx; - winterface->sync(worker); - - worker->hook = tile_worker_hook; - worker->data1 = thread_data; - worker->data2 = pbi; - } -#if CONFIG_ACCOUNTING - if (pbi->acct_enabled) { - aom_accounting_reset(&pbi->accounting); - } -#endif for (int row = 0; row < tile_rows; row++) { for (int col = 0; col < tile_cols; col++) { TileDataDec *tile_data = pbi->tile_data + row * cm->tile_cols + col; av1_tile_init(&tile_data->tile_info, cm, row, col); + + max_sb_rows = + AOMMAX(max_sb_rows, get_sb_rows_in_tile(pbi, tile_data->tile_info)); } } - if (pbi->tile_mt_info.alloc_tile_cols != tile_cols || - pbi->tile_mt_info.alloc_tile_rows != tile_rows) { - av1_dealloc_dec_jobs(&pbi->tile_mt_info); - alloc_dec_jobs(&pbi->tile_mt_info, cm, tile_rows, tile_cols); + if (pbi->allocated_row_mt_sync_rows != max_sb_rows) { + for (int i = 0; i < n_tiles; ++i) { + TileDataDec *const tile_data = pbi->tile_data + i; + av1_dec_row_mt_dealloc(&tile_data->dec_row_mt_sync); + dec_row_mt_alloc(&tile_data->dec_row_mt_sync, cm, max_sb_rows); + } + pbi->allocated_row_mt_sync_rows = max_sb_rows; } - enqueue_tile_jobs(pbi, cm, tile_rows_start, tile_rows_end, tile_cols_start, - tile_cols_end, start_tile, end_tile); - qsort(pbi->tile_mt_info.job_queue, pbi->tile_mt_info.jobs_enqueued, - sizeof(pbi->tile_mt_info.job_queue[0]), compare_tile_buffers); - { - const int base = tile_count_tg / num_workers; - const int remain = tile_count_tg % num_workers; - int tile_start = start_tile; - int corrupted = 0; - - for (worker_idx = 0; worker_idx < num_workers; ++worker_idx) { - // compute number of tiles assign to each worker - const int count = base + (remain + worker_idx) / num_workers; - AVxWorker *const worker = &pbi->tile_workers[worker_idx]; - DecWorkerData *const thread_data = (DecWorkerData *)worker->data1; - - thread_data->data_end = data_end; - tile_start += count; + tile_mt_queue(pbi, tile_cols, tile_rows, tile_rows_start, tile_rows_end, + tile_cols_start, tile_cols_end, start_tile, end_tile); - worker->had_error = 0; - if (worker_idx == num_workers - 1) { - winterface->execute(worker); - } else { - winterface->launch(worker); - } - } + dec_alloc_cb_buf(pbi); - for (; worker_idx > 0; --worker_idx) { - AVxWorker *const worker = &pbi->tile_workers[worker_idx - 1]; - aom_merge_corrupted_flag(&corrupted, !winterface->sync(worker)); - } + row_mt_frame_init(pbi, tile_rows_start, tile_rows_end, tile_cols_start, + tile_cols_end, start_tile, end_tile, max_sb_rows); - pbi->mb.corrupted = corrupted; - } + reset_dec_workers(pbi, row_mt_worker_hook, num_workers); + launch_dec_workers(pbi, data_end, num_workers); + sync_dec_workers(pbi, num_workers); if (pbi->mb.corrupted) aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, @@ -3064,17 +3832,20 @@ static void error_handler(void *data) { } // Reads the high_bitdepth and twelve_bit fields in color_config() and sets -// cm->bit_depth based on the values of those fields and cm->profile. Reports -// errors by calling rb->error_handler() or aom_internal_error(). -static void av1_read_bitdepth(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) { +// seq_params->bit_depth based on the values of those fields and +// seq_params->profile. Reports errors by calling rb->error_handler() or +// aom_internal_error(). +static void read_bitdepth(struct aom_read_bit_buffer *rb, + SequenceHeader *seq_params, + struct aom_internal_error_info *error_info) { const int high_bitdepth = aom_rb_read_bit(rb); - if (cm->profile == PROFILE_2 && high_bitdepth) { + if (seq_params->profile == PROFILE_2 && high_bitdepth) { const int twelve_bit = aom_rb_read_bit(rb); - cm->bit_depth = twelve_bit ? AOM_BITS_12 : AOM_BITS_10; - } else if (cm->profile <= PROFILE_2) { - cm->bit_depth = high_bitdepth ? AOM_BITS_10 : AOM_BITS_8; + seq_params->bit_depth = twelve_bit ? AOM_BITS_12 : AOM_BITS_10; + } else if (seq_params->profile <= PROFILE_2) { + seq_params->bit_depth = high_bitdepth ? AOM_BITS_10 : AOM_BITS_8; } else { - aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM, + aom_internal_error(error_info, AOM_CODEC_UNSUP_BITSTREAM, "Unsupported profile/bit-depth combination"); } } @@ -3082,6 +3853,7 @@ static void av1_read_bitdepth(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) { void av1_read_film_grain_params(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) { aom_film_grain_t *pars = &cm->film_grain_params; + const SequenceHeader *const seq_params = &cm->seq_params; pars->apply_grain = aom_rb_read_bit(rb); if (!pars->apply_grain) { @@ -3095,6 +3867,8 @@ void av1_read_film_grain_params(AV1_COMMON *cm, else pars->update_parameters = 1; + pars->bit_depth = seq_params->bit_depth; + if (!pars->update_parameters) { // inherit parameters from a previous reference frame RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; @@ -3129,11 +3903,11 @@ void av1_read_film_grain_params(AV1_COMMON *cm, pars->scaling_points_y[i][1] = aom_rb_read_literal(rb, 8); } - if (!cm->seq_params.monochrome) + if (!seq_params->monochrome) pars->chroma_scaling_from_luma = aom_rb_read_bit(rb); - if (cm->seq_params.monochrome || pars->chroma_scaling_from_luma || - ((cm->subsampling_x == 1) && (cm->subsampling_y == 1) && + if (seq_params->monochrome || pars->chroma_scaling_from_luma || + ((seq_params->subsampling_x == 1) && (seq_params->subsampling_y == 1) && (pars->num_y_points == 0))) { pars->num_cb_points = 0; pars->num_cr_points = 0; @@ -3168,7 +3942,7 @@ void av1_read_film_grain_params(AV1_COMMON *cm, pars->scaling_points_cr[i][1] = aom_rb_read_literal(rb, 8); } - if ((cm->subsampling_x == 1) && (cm->subsampling_y == 1) && + if ((seq_params->subsampling_x == 1) && (seq_params->subsampling_y == 1) && (((pars->num_cb_points == 0) && (pars->num_cr_points != 0)) || ((pars->num_cb_points != 0) && (pars->num_cr_points == 0)))) aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM, @@ -3222,89 +3996,93 @@ void av1_read_film_grain_params(AV1_COMMON *cm, } static void read_film_grain(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) { - if (cm->film_grain_params_present && (cm->show_frame || cm->showable_frame)) { + if (cm->seq_params.film_grain_params_present && + (cm->show_frame || cm->showable_frame)) { av1_read_film_grain_params(cm, rb); } else { memset(&cm->film_grain_params, 0, sizeof(cm->film_grain_params)); } - cm->film_grain_params.bit_depth = cm->bit_depth; + cm->film_grain_params.bit_depth = cm->seq_params.bit_depth; memcpy(&cm->cur_frame->film_grain_params, &cm->film_grain_params, sizeof(aom_film_grain_t)); } -void av1_read_color_config(AV1_COMMON *cm, struct aom_read_bit_buffer *rb, - int allow_lowbitdepth) { - av1_read_bitdepth(cm, rb); +void av1_read_color_config(struct aom_read_bit_buffer *rb, + int allow_lowbitdepth, SequenceHeader *seq_params, + struct aom_internal_error_info *error_info) { + read_bitdepth(rb, seq_params, error_info); - cm->use_highbitdepth = cm->bit_depth > AOM_BITS_8 || !allow_lowbitdepth; + seq_params->use_highbitdepth = + seq_params->bit_depth > AOM_BITS_8 || !allow_lowbitdepth; // monochrome bit (not needed for PROFILE_1) - const int is_monochrome = cm->profile != PROFILE_1 ? aom_rb_read_bit(rb) : 0; - cm->seq_params.monochrome = is_monochrome; + const int is_monochrome = + seq_params->profile != PROFILE_1 ? aom_rb_read_bit(rb) : 0; + seq_params->monochrome = is_monochrome; int color_description_present_flag = aom_rb_read_bit(rb); if (color_description_present_flag) { - cm->color_primaries = aom_rb_read_literal(rb, 8); - cm->transfer_characteristics = aom_rb_read_literal(rb, 8); - cm->matrix_coefficients = aom_rb_read_literal(rb, 8); + seq_params->color_primaries = aom_rb_read_literal(rb, 8); + seq_params->transfer_characteristics = aom_rb_read_literal(rb, 8); + seq_params->matrix_coefficients = aom_rb_read_literal(rb, 8); } else { - cm->color_primaries = AOM_CICP_CP_UNSPECIFIED; - cm->transfer_characteristics = AOM_CICP_TC_UNSPECIFIED; - cm->matrix_coefficients = AOM_CICP_MC_UNSPECIFIED; + seq_params->color_primaries = AOM_CICP_CP_UNSPECIFIED; + seq_params->transfer_characteristics = AOM_CICP_TC_UNSPECIFIED; + seq_params->matrix_coefficients = AOM_CICP_MC_UNSPECIFIED; } if (is_monochrome) { // [16,235] (including xvycc) vs [0,255] range - cm->color_range = aom_rb_read_bit(rb); - cm->subsampling_y = cm->subsampling_x = 1; - cm->chroma_sample_position = AOM_CSP_UNKNOWN; - cm->separate_uv_delta_q = 0; + seq_params->color_range = aom_rb_read_bit(rb); + seq_params->subsampling_y = seq_params->subsampling_x = 1; + seq_params->chroma_sample_position = AOM_CSP_UNKNOWN; + seq_params->separate_uv_delta_q = 0; return; } - if (cm->color_primaries == AOM_CICP_CP_BT_709 && - cm->transfer_characteristics == AOM_CICP_TC_SRGB && - cm->matrix_coefficients == AOM_CICP_MC_IDENTITY) { // it would be better - // to remove this - // dependency too - cm->subsampling_y = cm->subsampling_x = 0; - cm->color_range = 1; // assume full color-range - if (!(cm->profile == PROFILE_1 || - (cm->profile == PROFILE_2 && cm->bit_depth == AOM_BITS_12))) { + if (seq_params->color_primaries == AOM_CICP_CP_BT_709 && + seq_params->transfer_characteristics == AOM_CICP_TC_SRGB && + seq_params->matrix_coefficients == AOM_CICP_MC_IDENTITY) { + // It would be good to remove this dependency. + seq_params->subsampling_y = seq_params->subsampling_x = 0; + seq_params->color_range = 1; // assume full color-range + if (!(seq_params->profile == PROFILE_1 || + (seq_params->profile == PROFILE_2 && + seq_params->bit_depth == AOM_BITS_12))) { aom_internal_error( - &cm->error, AOM_CODEC_UNSUP_BITSTREAM, + error_info, AOM_CODEC_UNSUP_BITSTREAM, "sRGB colorspace not compatible with specified profile"); } } else { // [16,235] (including xvycc) vs [0,255] range - cm->color_range = aom_rb_read_bit(rb); - if (cm->profile == PROFILE_0) { + seq_params->color_range = aom_rb_read_bit(rb); + if (seq_params->profile == PROFILE_0) { // 420 only - cm->subsampling_x = cm->subsampling_y = 1; - } else if (cm->profile == PROFILE_1) { + seq_params->subsampling_x = seq_params->subsampling_y = 1; + } else if (seq_params->profile == PROFILE_1) { // 444 only - cm->subsampling_x = cm->subsampling_y = 0; + seq_params->subsampling_x = seq_params->subsampling_y = 0; } else { - assert(cm->profile == PROFILE_2); - if (cm->bit_depth == AOM_BITS_12) { - cm->subsampling_x = aom_rb_read_bit(rb); - if (cm->subsampling_x) - cm->subsampling_y = aom_rb_read_bit(rb); // 422 or 420 + assert(seq_params->profile == PROFILE_2); + if (seq_params->bit_depth == AOM_BITS_12) { + seq_params->subsampling_x = aom_rb_read_bit(rb); + if (seq_params->subsampling_x) + seq_params->subsampling_y = aom_rb_read_bit(rb); // 422 or 420 else - cm->subsampling_y = 0; // 444 + seq_params->subsampling_y = 0; // 444 } else { // 422 - cm->subsampling_x = 1; - cm->subsampling_y = 0; + seq_params->subsampling_x = 1; + seq_params->subsampling_y = 0; } } - if (cm->matrix_coefficients == AOM_CICP_MC_IDENTITY && - (cm->subsampling_x || cm->subsampling_y)) { + if (seq_params->matrix_coefficients == AOM_CICP_MC_IDENTITY && + (seq_params->subsampling_x || seq_params->subsampling_y)) { aom_internal_error( - &cm->error, AOM_CODEC_UNSUP_BITSTREAM, + error_info, AOM_CODEC_UNSUP_BITSTREAM, "Identity CICP Matrix incompatible with non 4:4:4 color sampling"); } - if (cm->subsampling_x && cm->subsampling_y) { - cm->chroma_sample_position = aom_rb_read_literal(rb, 2); + if (seq_params->subsampling_x && seq_params->subsampling_y) { + seq_params->chroma_sample_position = aom_rb_read_literal(rb, 2); } } - cm->separate_uv_delta_q = aom_rb_read_bit(rb); + seq_params->separate_uv_delta_q = aom_rb_read_bit(rb); } void av1_read_timing_info_header(AV1_COMMON *cm, @@ -3338,8 +4116,8 @@ void av1_read_decoder_model_info(AV1_COMMON *cm, aom_rb_read_literal(rb, 5) + 1; cm->buffer_model.num_units_in_decoding_tick = aom_rb_read_unsigned_literal( rb, 32); // Number of units in a decoding tick - cm->buffer_model.buffer_removal_delay_length = aom_rb_read_literal(rb, 5) + 1; - cm->buffer_model.frame_presentation_delay_length = + cm->buffer_model.buffer_removal_time_length = aom_rb_read_literal(rb, 5) + 1; + cm->buffer_model.frame_presentation_time_length = aom_rb_read_literal(rb, 5) + 1; } @@ -3352,32 +4130,27 @@ void av1_read_op_parameters_info(AV1_COMMON *const cm, op_num + 1); } - cm->op_params[op_num].decoder_buffer_delay = aom_rb_read_literal( + cm->op_params[op_num].decoder_buffer_delay = aom_rb_read_unsigned_literal( rb, cm->buffer_model.encoder_decoder_buffer_delay_length); - cm->op_params[op_num].encoder_buffer_delay = aom_rb_read_literal( + cm->op_params[op_num].encoder_buffer_delay = aom_rb_read_unsigned_literal( rb, cm->buffer_model.encoder_decoder_buffer_delay_length); cm->op_params[op_num].low_delay_mode_flag = aom_rb_read_bit(rb); } -static void av1_read_tu_pts_info(AV1_COMMON *const cm, - struct aom_read_bit_buffer *rb) { - cm->tu_presentation_delay = - aom_rb_read_literal(rb, cm->buffer_model.frame_presentation_delay_length); -} - -void read_sequence_header(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) { - // rb->error_handler may be triggered during aom_rb_read_bit(), raising - // internal errors and immediate decoding termination. We use a local variable - // to store the info. as we decode. At the end, if no errors have occurred, - // cm->seq_params is updated. - SequenceHeader sh = cm->seq_params; - SequenceHeader *const seq_params = &sh; - int num_bits_width = aom_rb_read_literal(rb, 4) + 1; - int num_bits_height = aom_rb_read_literal(rb, 4) + 1; - int max_frame_width = aom_rb_read_literal(rb, num_bits_width) + 1; - int max_frame_height = aom_rb_read_literal(rb, num_bits_height) + 1; +static void av1_read_temporal_point_info(AV1_COMMON *const cm, + struct aom_read_bit_buffer *rb) { + cm->frame_presentation_time = aom_rb_read_unsigned_literal( + rb, cm->buffer_model.frame_presentation_time_length); +} + +void av1_read_sequence_header(AV1_COMMON *cm, struct aom_read_bit_buffer *rb, + SequenceHeader *seq_params) { + const int num_bits_width = aom_rb_read_literal(rb, 4) + 1; + const int num_bits_height = aom_rb_read_literal(rb, 4) + 1; + const int max_frame_width = aom_rb_read_literal(rb, num_bits_width) + 1; + const int max_frame_height = aom_rb_read_literal(rb, num_bits_height) + 1; seq_params->num_bits_width = num_bits_width; seq_params->num_bits_height = num_bits_height; @@ -3452,7 +4225,6 @@ void read_sequence_header(AV1_COMMON *cm, struct aom_read_bit_buffer *rb) { seq_params->enable_superres = aom_rb_read_bit(rb); seq_params->enable_cdef = aom_rb_read_bit(rb); seq_params->enable_restoration = aom_rb_read_bit(rb); - cm->seq_params = *seq_params; } static int read_global_motion_params(WarpedMotionParams *params, @@ -3640,9 +4412,12 @@ static void show_existing_frame_reset(AV1Decoder *const pbi, *cm->fc = cm->frame_contexts[existing_frame_idx]; } +// On success, returns 0. On failure, calls aom_internal_error and does not +// return. static int read_uncompressed_header(AV1Decoder *pbi, struct aom_read_bit_buffer *rb) { AV1_COMMON *const cm = &pbi->common; + const SequenceHeader *const seq_params = &cm->seq_params; MACROBLOCKD *const xd = &pbi->mb; BufferPool *const pool = cm->buffer_pool; RefCntBuffer *const frame_bufs = pool->frame_bufs; @@ -3658,7 +4433,7 @@ static int read_uncompressed_header(AV1Decoder *pbi, // NOTE: By default all coded frames to be used as a reference cm->is_reference_frame = 1; - if (cm->seq_params.reduced_still_picture_hdr) { + if (seq_params->reduced_still_picture_hdr) { cm->show_existing_frame = 0; cm->show_frame = 1; cm->frame_type = KEY_FRAME; @@ -3671,12 +4446,12 @@ static int read_uncompressed_header(AV1Decoder *pbi, // Show an existing frame directly. const int existing_frame_idx = aom_rb_read_literal(rb, 3); const int frame_to_show = cm->ref_frame_map[existing_frame_idx]; - if (cm->seq_params.decoder_model_info_present_flag && + if (seq_params->decoder_model_info_present_flag && cm->timing_info.equal_picture_interval == 0) { - av1_read_tu_pts_info(cm, rb); + av1_read_temporal_point_info(cm, rb); } - if (cm->seq_params.frame_id_numbers_present_flag) { - int frame_id_length = cm->seq_params.frame_id_length; + if (seq_params->frame_id_numbers_present_flag) { + int frame_id_length = seq_params->frame_id_length; int display_frame_id = aom_rb_read_literal(rb, frame_id_length); /* Compare display_frame_id with ref_frame_id and check valid for * referencing */ @@ -3719,16 +4494,16 @@ static int read_uncompressed_header(AV1Decoder *pbi, cm->frame_type = (FRAME_TYPE)aom_rb_read_literal(rb, 2); // 2 bits cm->show_frame = aom_rb_read_bit(rb); - if (cm->seq_params.still_picture && + if (seq_params->still_picture && (cm->frame_type != KEY_FRAME || !cm->show_frame)) { aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, "Still pictures must be coded as shown keyframes"); } cm->showable_frame = cm->frame_type != KEY_FRAME; if (cm->show_frame) { - if (cm->seq_params.decoder_model_info_present_flag && + if (seq_params->decoder_model_info_present_flag && cm->timing_info.equal_picture_interval == 0) - av1_read_tu_pts_info(cm, rb); + av1_read_temporal_point_info(cm, rb); } else { // See if this frame can be used as show_existing_frame in future cm->showable_frame = aom_rb_read_bit(rb); @@ -3742,17 +4517,17 @@ static int read_uncompressed_header(AV1Decoder *pbi, } cm->disable_cdf_update = aom_rb_read_bit(rb); - if (cm->seq_params.force_screen_content_tools == 2) { + if (seq_params->force_screen_content_tools == 2) { cm->allow_screen_content_tools = aom_rb_read_bit(rb); } else { - cm->allow_screen_content_tools = cm->seq_params.force_screen_content_tools; + cm->allow_screen_content_tools = seq_params->force_screen_content_tools; } if (cm->allow_screen_content_tools) { - if (cm->seq_params.force_integer_mv == 2) { + if (seq_params->force_integer_mv == 2) { cm->cur_frame_force_integer_mv = aom_rb_read_bit(rb); } else { - cm->cur_frame_force_integer_mv = cm->seq_params.force_integer_mv; + cm->cur_frame_force_integer_mv = seq_params->force_integer_mv; } } else { cm->cur_frame_force_integer_mv = 0; @@ -3763,10 +4538,10 @@ static int read_uncompressed_header(AV1Decoder *pbi, cm->allow_intrabc = 0; cm->primary_ref_frame = PRIMARY_REF_NONE; - if (!cm->seq_params.reduced_still_picture_hdr) { - if (cm->seq_params.frame_id_numbers_present_flag) { - int frame_id_length = cm->seq_params.frame_id_length; - int diff_len = cm->seq_params.delta_frame_id_length; + if (!seq_params->reduced_still_picture_hdr) { + if (seq_params->frame_id_numbers_present_flag) { + int frame_id_length = seq_params->frame_id_length; + int diff_len = seq_params->delta_frame_id_length; int prev_frame_id = 0; int have_prev_frame_id = !pbi->decoding_first_frame && !(cm->frame_type == KEY_FRAME && cm->show_frame); @@ -3811,7 +4586,7 @@ static int read_uncompressed_header(AV1Decoder *pbi, frame_is_sframe(cm) ? 1 : aom_rb_read_literal(rb, 1); cm->frame_offset = - aom_rb_read_literal(rb, cm->seq_params.order_hint_bits_minus_1 + 1); + aom_rb_read_literal(rb, seq_params->order_hint_bits_minus_1 + 1); cm->current_video_frame = cm->frame_offset; if (!cm->error_resilient_mode && !frame_is_intra_only(cm)) { @@ -3819,27 +4594,27 @@ static int read_uncompressed_header(AV1Decoder *pbi, } } - if (cm->seq_params.decoder_model_info_present_flag) { - cm->buffer_removal_delay_present = aom_rb_read_bit(rb); - if (cm->buffer_removal_delay_present) { + if (seq_params->decoder_model_info_present_flag) { + cm->buffer_removal_time_present = aom_rb_read_bit(rb); + if (cm->buffer_removal_time_present) { for (int op_num = 0; - op_num < cm->seq_params.operating_points_cnt_minus_1 + 1; op_num++) { + op_num < seq_params->operating_points_cnt_minus_1 + 1; op_num++) { if (cm->op_params[op_num].decoder_model_param_present_flag) { - if ((((cm->seq_params.operating_point_idc[op_num] >> + if ((((seq_params->operating_point_idc[op_num] >> cm->temporal_layer_id) & 0x1) && - ((cm->seq_params.operating_point_idc[op_num] >> + ((seq_params->operating_point_idc[op_num] >> (cm->spatial_layer_id + 8)) & 0x1)) || - cm->seq_params.operating_point_idc[op_num] == 0) { - cm->op_frame_timing[op_num].buffer_removal_delay = - aom_rb_read_literal( - rb, cm->buffer_model.buffer_removal_delay_length); + seq_params->operating_point_idc[op_num] == 0) { + cm->op_frame_timing[op_num].buffer_removal_time = + aom_rb_read_unsigned_literal( + rb, cm->buffer_model.buffer_removal_time_length); } else { - cm->op_frame_timing[op_num].buffer_removal_delay = 0; + cm->op_frame_timing[op_num].buffer_removal_time = 0; } } else { - cm->op_frame_timing[op_num].buffer_removal_delay = 0; + cm->op_frame_timing[op_num].buffer_removal_time = 0; } } } @@ -3882,11 +4657,11 @@ static int read_uncompressed_header(AV1Decoder *pbi, if (!frame_is_intra_only(cm) || pbi->refresh_frame_flags != 0xFF) { // Read all ref frame order hints if error_resilient_mode == 1 - if (cm->error_resilient_mode && cm->seq_params.enable_order_hint) { + if (cm->error_resilient_mode && seq_params->enable_order_hint) { for (int ref_idx = 0; ref_idx < REF_FRAMES; ref_idx++) { // Read order hint from bit stream unsigned int frame_offset = - aom_rb_read_literal(rb, cm->seq_params.order_hint_bits_minus_1 + 1); + aom_rb_read_literal(rb, seq_params->order_hint_bits_minus_1 + 1); // Get buffer index int buf_idx = cm->ref_frame_map[ref_idx]; assert(buf_idx < FRAME_BUFFERS); @@ -3906,10 +4681,10 @@ static int read_uncompressed_header(AV1Decoder *pbi, } lock_buffer_pool(pool); if (aom_realloc_frame_buffer( - &frame_bufs[buf_idx].buf, cm->seq_params.max_frame_width, - cm->seq_params.max_frame_height, cm->subsampling_x, - cm->subsampling_y, cm->use_highbitdepth, AOM_BORDER_IN_PIXELS, - cm->byte_alignment, + &frame_bufs[buf_idx].buf, seq_params->max_frame_width, + seq_params->max_frame_height, seq_params->subsampling_x, + seq_params->subsampling_y, seq_params->use_highbitdepth, + AOM_BORDER_IN_PIXELS, cm->byte_alignment, &pool->frame_bufs[buf_idx].raw_frame_buffer, pool->get_fb_cb, pool->cb_priv)) { unlock_buffer_pool(pool); @@ -3917,7 +4692,7 @@ static int read_uncompressed_header(AV1Decoder *pbi, "Failed to allocate frame buffer"); } unlock_buffer_pool(pool); - set_planes_to_neutral_grey(cm, &frame_bufs[buf_idx].buf, 0); + set_planes_to_neutral_grey(seq_params, &frame_bufs[buf_idx].buf, 0); cm->ref_frame_map[ref_idx] = buf_idx; frame_bufs[buf_idx].cur_frame_offset = frame_offset; @@ -3937,7 +4712,8 @@ static int read_uncompressed_header(AV1Decoder *pbi, cm->allow_ref_frame_mvs = 0; if (cm->intra_only) { - cm->cur_frame->film_grain_params_present = cm->film_grain_params_present; + cm->cur_frame->film_grain_params_present = + seq_params->film_grain_params_present; setup_frame_size(cm, frame_size_override_flag, rb); if (cm->allow_screen_content_tools && !av1_superres_scaled(cm)) cm->allow_intrabc = aom_rb_read_bit(rb); @@ -3945,7 +4721,7 @@ static int read_uncompressed_header(AV1Decoder *pbi, } else if (pbi->need_resync != 1) { /* Skip if need resync */ // Frame refs short signaling is off when error resilient mode is on. - if (cm->seq_params.enable_order_hint) + if (seq_params->enable_order_hint) cm->frame_refs_short_signaling = aom_rb_read_bit(rb); if (cm->frame_refs_short_signaling) { @@ -3999,9 +4775,9 @@ static int read_uncompressed_header(AV1Decoder *pbi, cm->ref_frame_sign_bias[LAST_FRAME + i] = 0; - if (cm->seq_params.frame_id_numbers_present_flag) { - int frame_id_length = cm->seq_params.frame_id_length; - int diff_len = cm->seq_params.delta_frame_id_length; + if (seq_params->frame_id_numbers_present_flag) { + int frame_id_length = seq_params->frame_id_length; + int diff_len = seq_params->delta_frame_id_length; int delta_frame_id_minus_1 = aom_rb_read_literal(rb, diff_len); int ref_frame_id = ((cm->current_frame_id - (delta_frame_id_minus_1 + 1) + @@ -4064,7 +4840,7 @@ static int read_uncompressed_header(AV1Decoder *pbi, cm->cur_frame->intra_only = cm->frame_type == KEY_FRAME || cm->intra_only; cm->cur_frame->frame_type = cm->frame_type; - if (cm->seq_params.frame_id_numbers_present_flag) { + if (seq_params->frame_id_numbers_present_flag) { /* If bitmask is set, update reference frame id values and mark frames as valid for reference */ int refresh_frame_flags = pbi->refresh_frame_flags; @@ -4077,7 +4853,7 @@ static int read_uncompressed_header(AV1Decoder *pbi, } const int might_bwd_adapt = - !(cm->seq_params.reduced_still_picture_hdr) && !(cm->disable_cdf_update); + !(seq_params->reduced_still_picture_hdr) && !(cm->disable_cdf_update); if (might_bwd_adapt) { cm->refresh_frame_context = aom_rb_read_bit(rb) ? REFRESH_FRAME_CONTEXT_DISABLED @@ -4086,14 +4862,16 @@ static int read_uncompressed_header(AV1Decoder *pbi, cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED; } - get_frame_new_buffer(cm)->bit_depth = cm->bit_depth; - get_frame_new_buffer(cm)->color_primaries = cm->color_primaries; + get_frame_new_buffer(cm)->bit_depth = seq_params->bit_depth; + get_frame_new_buffer(cm)->color_primaries = seq_params->color_primaries; get_frame_new_buffer(cm)->transfer_characteristics = - cm->transfer_characteristics; - get_frame_new_buffer(cm)->matrix_coefficients = cm->matrix_coefficients; - get_frame_new_buffer(cm)->monochrome = cm->seq_params.monochrome; - get_frame_new_buffer(cm)->chroma_sample_position = cm->chroma_sample_position; - get_frame_new_buffer(cm)->color_range = cm->color_range; + seq_params->transfer_characteristics; + get_frame_new_buffer(cm)->matrix_coefficients = + seq_params->matrix_coefficients; + get_frame_new_buffer(cm)->monochrome = seq_params->monochrome; + get_frame_new_buffer(cm)->chroma_sample_position = + seq_params->chroma_sample_position; + get_frame_new_buffer(cm)->color_range = seq_params->color_range; get_frame_new_buffer(cm)->render_width = cm->render_width; get_frame_new_buffer(cm)->render_height = cm->render_height; @@ -4145,7 +4923,7 @@ static int read_uncompressed_header(AV1Decoder *pbi, read_tile_info(pbi, rb); setup_quantization(cm, rb); - xd->bd = (int)cm->bit_depth; + xd->bd = (int)seq_params->bit_depth; if (cm->num_allocated_above_context_planes < av1_num_planes(cm) || cm->num_allocated_above_context_mi_col < cm->mi_cols || @@ -4196,22 +4974,22 @@ static int read_uncompressed_header(AV1Decoder *pbi, cm->lf.filter_level[0] = 0; cm->lf.filter_level[1] = 0; } - if (cm->coded_lossless || !cm->seq_params.enable_cdef) { + if (cm->coded_lossless || !seq_params->enable_cdef) { cm->cdef_bits = 0; cm->cdef_strengths[0] = 0; cm->cdef_uv_strengths[0] = 0; } - if (cm->all_lossless || !cm->seq_params.enable_restoration) { + if (cm->all_lossless || !seq_params->enable_restoration) { cm->rst_info[0].frame_restoration_type = RESTORE_NONE; cm->rst_info[1].frame_restoration_type = RESTORE_NONE; cm->rst_info[2].frame_restoration_type = RESTORE_NONE; } setup_loopfilter(cm, rb); - if (!cm->coded_lossless && cm->seq_params.enable_cdef) { + if (!cm->coded_lossless && seq_params->enable_cdef) { setup_cdef(cm, rb); } - if (!cm->all_lossless && cm->seq_params.enable_restoration) { + if (!cm->all_lossless && seq_params->enable_restoration) { decode_restoration_mode(cm, rb); } @@ -4236,7 +5014,8 @@ static int read_uncompressed_header(AV1Decoder *pbi, if (!frame_is_intra_only(cm)) read_global_motion(cm, rb); - cm->cur_frame->film_grain_params_present = cm->film_grain_params_present; + cm->cur_frame->film_grain_params_present = + seq_params->film_grain_params_present; read_film_grain(cm, rb); #if EXT_TILE_DEBUG @@ -4282,11 +5061,11 @@ void superres_post_decode(AV1Decoder *pbi) { unlock_buffer_pool(pool); } -int av1_decode_frame_headers_and_setup(AV1Decoder *pbi, - struct aom_read_bit_buffer *rb, - const uint8_t *data, - const uint8_t **p_data_end, - int trailing_bits_present) { +uint32_t av1_decode_frame_headers_and_setup(AV1Decoder *pbi, + struct aom_read_bit_buffer *rb, + const uint8_t *data, + const uint8_t **p_data_end, + int trailing_bits_present) { AV1_COMMON *const cm = &pbi->common; const int num_planes = av1_num_planes(cm); MACROBLOCKD *const xd = &pbi->mb; @@ -4316,7 +5095,8 @@ int av1_decode_frame_headers_and_setup(AV1Decoder *pbi, pbi->dec_tile_col = -1; } - pbi->uncomp_hdr_size = aom_rb_bytes_read(rb); + const uint32_t uncomp_hdr_size = + (uint32_t)aom_rb_bytes_read(rb); // Size of the uncompressed header YV12_BUFFER_CONFIG *new_fb = get_frame_new_buffer(cm); xd->cur_buf = new_fb; if (av1_allow_intrabc(cm)) { @@ -4327,7 +5107,7 @@ int av1_decode_frame_headers_and_setup(AV1Decoder *pbi, if (cm->show_existing_frame) { // showing a frame directly - *p_data_end = data + aom_rb_bytes_read(rb); + *p_data_end = data + uncomp_hdr_size; if (cm->reset_decoder_state) { // Use the default frame context values. *cm->fc = cm->frame_contexts[FRAME_CONTEXT_DEFAULTS]; @@ -4335,7 +5115,7 @@ int av1_decode_frame_headers_and_setup(AV1Decoder *pbi, aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, "Uninitialized entropy context."); } - return 0; + return uncomp_hdr_size; } cm->setup_mi(cm); @@ -4344,7 +5124,8 @@ int av1_decode_frame_headers_and_setup(AV1Decoder *pbi, av1_setup_motion_field(cm); - av1_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y, num_planes); + av1_setup_block_planes(xd, cm->seq_params.subsampling_x, + cm->seq_params.subsampling_y, num_planes); if (cm->primary_ref_frame == PRIMARY_REF_NONE) { // use the default frame context values *cm->fc = cm->frame_contexts[FRAME_CONTEXT_DEFAULTS]; @@ -4356,7 +5137,7 @@ int av1_decode_frame_headers_and_setup(AV1Decoder *pbi, "Uninitialized entropy context."); xd->corrupted = 0; - return 0; + return uncomp_hdr_size; } // Once-per-frame initialization @@ -4368,7 +5149,7 @@ static void setup_frame_info(AV1Decoder *pbi) { cm->rst_info[2].frame_restoration_type != RESTORE_NONE) { av1_alloc_restoration_buffers(cm); } - const int use_highbd = cm->use_highbitdepth ? 1 : 0; + const int use_highbd = cm->seq_params.use_highbitdepth ? 1 : 0; const int buf_size = MC_TEMP_BUF_PELS << use_highbd; if (pbi->td.mc_buf_size != buf_size) { av1_free_mc_tmp_buf(&pbi->td, use_highbd); @@ -4386,14 +5167,21 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data, if (initialize_flag) setup_frame_info(pbi); - if (pbi->max_threads > 1 && tile_count_tg > 1 && !cm->large_scale_tile) + if (pbi->max_threads > 1 && !(cm->large_scale_tile && !pbi->ext_tile_debug) && + pbi->row_mt) + *p_data_end = + decode_tiles_row_mt(pbi, data, data_end, start_tile, end_tile); + else if (pbi->max_threads > 1 && tile_count_tg > 1 && + !(cm->large_scale_tile && !pbi->ext_tile_debug)) *p_data_end = decode_tiles_mt(pbi, data, data_end, start_tile, end_tile); else *p_data_end = decode_tiles(pbi, data, data_end, start_tile, end_tile); const int num_planes = av1_num_planes(cm); // If the bit stream is monochrome, set the U and V buffers to a constant. - if (num_planes < 3) set_planes_to_neutral_grey(cm, xd->cur_buf, 1); + if (num_planes < 3) { + set_planes_to_neutral_grey(&cm->seq_params, xd->cur_buf, 1); + } if (end_tile != cm->tile_rows * cm->tile_cols - 1) { return; diff --git a/third_party/aom/av1/decoder/decodeframe.h b/third_party/aom/av1/decoder/decodeframe.h index 330cedcdc..d289b31f2 100644 --- a/third_party/aom/av1/decoder/decodeframe.h +++ b/third_party/aom/av1/decoder/decodeframe.h @@ -18,12 +18,13 @@ extern "C" { struct AV1Decoder; struct aom_read_bit_buffer; +struct ThreadData; // Reads the middle part of the sequence header OBU (from -// frame_width_bits_minus_1 to enable_restoration) into cm->seq_params (a -// SequenceHeader). Reports errors by calling rb->error_handler() or -// aom_internal_error(). -void read_sequence_header(AV1_COMMON *cm, struct aom_read_bit_buffer *rb); +// frame_width_bits_minus_1 to enable_restoration) into seq_params. +// Reports errors by calling rb->error_handler() or aom_internal_error(). +void av1_read_sequence_header(AV1_COMMON *cm, struct aom_read_bit_buffer *rb, + SequenceHeader *seq_params); void av1_read_frame_size(struct aom_read_bit_buffer *rb, int num_bits_width, int num_bits_height, int *width, int *height); @@ -34,11 +35,14 @@ BITSTREAM_PROFILE av1_read_profile(struct aom_read_bit_buffer *rb); int av1_check_trailing_bits(struct AV1Decoder *pbi, struct aom_read_bit_buffer *rb); -int av1_decode_frame_headers_and_setup(struct AV1Decoder *pbi, - struct aom_read_bit_buffer *rb, - const uint8_t *data, - const uint8_t **p_data_end, - int trailing_bits_present); +// On success, returns the frame header size. On failure, calls +// aom_internal_error and does not return. +// TODO(wtc): Figure out and document the p_data_end parameter. +uint32_t av1_decode_frame_headers_and_setup(struct AV1Decoder *pbi, + struct aom_read_bit_buffer *rb, + const uint8_t *data, + const uint8_t **p_data_end, + int trailing_bits_present); void av1_decode_tg_tiles_and_wrapup(struct AV1Decoder *pbi, const uint8_t *data, const uint8_t *data_end, @@ -47,8 +51,9 @@ void av1_decode_tg_tiles_and_wrapup(struct AV1Decoder *pbi, const uint8_t *data, // Implements the color_config() function in the spec. Reports errors by // calling rb->error_handler() or aom_internal_error(). -void av1_read_color_config(AV1_COMMON *cm, struct aom_read_bit_buffer *rb, - int allow_lowbitdepth); +void av1_read_color_config(struct aom_read_bit_buffer *rb, + int allow_lowbitdepth, SequenceHeader *seq_params, + struct aom_internal_error_info *error_info); // Implements the timing_info() function in the spec. Reports errors by calling // rb->error_handler(). @@ -69,7 +74,7 @@ struct aom_read_bit_buffer *av1_init_read_bit_buffer( struct AV1Decoder *pbi, struct aom_read_bit_buffer *rb, const uint8_t *data, const uint8_t *data_end); -void av1_free_mc_tmp_buf(void *td, int use_highbd); +void av1_free_mc_tmp_buf(struct ThreadData *thread_data, int use_highbd); void av1_set_single_tile_decoding_mode(AV1_COMMON *const cm); diff --git a/third_party/aom/av1/decoder/decodemv.c b/third_party/aom/av1/decoder/decodemv.c index cc8f4d29e..5e920b18d 100644 --- a/third_party/aom/av1/decoder/decodemv.c +++ b/third_party/aom/av1/decoder/decodemv.c @@ -290,7 +290,7 @@ static int read_segment_id(AV1_COMMON *const cm, const MACROBLOCKD *const xd, av1_neg_deinterleave(coded_id, pred, seg->last_active_segid + 1); if (segment_id < 0 || segment_id > seg->last_active_segid) { - aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, + aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME, "Corrupted segment_ids"); } return segment_id; @@ -573,7 +573,7 @@ static void read_palette_mode_info(AV1_COMMON *const cm, MACROBLOCKD *const xd, aom_read_symbol(r, xd->tile_ctx->palette_y_size_cdf[bsize_ctx], PALETTE_SIZES, ACCT_STR) + 2; - read_palette_colors_y(xd, cm->bit_depth, pmi, r); + read_palette_colors_y(xd, cm->seq_params.bit_depth, pmi, r); } } if (num_planes > 1 && mbmi->uv_mode == UV_DC_PRED && @@ -587,7 +587,7 @@ static void read_palette_mode_info(AV1_COMMON *const cm, MACROBLOCKD *const xd, aom_read_symbol(r, xd->tile_ctx->palette_uv_size_cdf[bsize_ctx], PALETTE_SIZES, ACCT_STR) + 2; - read_palette_colors_uv(xd, cm->bit_depth, pmi, r); + read_palette_colors_uv(xd, cm->seq_params.bit_depth, pmi, r); } } } @@ -1299,7 +1299,7 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi, } if (is_compound != is_inter_compound_mode(mbmi->mode)) { - aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, + aom_internal_error(xd->error_info, AOM_CODEC_CORRUPT_FRAME, "Prediction mode %d invalid with ref frame %d %d", mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]); } @@ -1480,8 +1480,9 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi, } } - xd->cfl.is_chroma_reference = is_chroma_reference( - mi_row, mi_col, bsize, cm->subsampling_x, cm->subsampling_y); + xd->cfl.is_chroma_reference = + is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x, + cm->seq_params.subsampling_y); xd->cfl.store_y = store_cfl_required(cm, xd); #if DEC_MISMATCH_DEBUG diff --git a/third_party/aom/av1/decoder/decoder.c b/third_party/aom/av1/decoder/decoder.c index 2e91d27d3..e978fad6c 100644 --- a/third_party/aom/av1/decoder/decoder.c +++ b/third_party/aom/av1/decoder/decoder.c @@ -71,6 +71,7 @@ static void dec_free_mi(AV1_COMMON *cm) { cm->mip = NULL; aom_free(cm->mi_grid_base); cm->mi_grid_base = NULL; + cm->mi_alloc_size = 0; } AV1Decoder *av1_decoder_create(BufferPool *const pool) { @@ -81,6 +82,9 @@ AV1Decoder *av1_decoder_create(BufferPool *const pool) { av1_zero(*pbi); + // The jmp_buf is valid only for the duration of the function that calls + // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 + // before it returns. if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; av1_decoder_remove(pbi); @@ -98,7 +102,7 @@ AV1Decoder *av1_decoder_create(BufferPool *const pool) { memset(cm->frame_contexts, 0, FRAME_CONTEXTS * sizeof(*cm->frame_contexts)); pbi->need_resync = 1; - once(initialize_dec); + aom_once(initialize_dec); // Initialize the references to not point to any frame buffers. memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); @@ -108,7 +112,7 @@ AV1Decoder *av1_decoder_create(BufferPool *const pool) { pbi->decoding_first_frame = 1; pbi->common.buffer_pool = pool; - cm->bit_depth = AOM_BITS_8; + cm->seq_params.bit_depth = AOM_BITS_8; cm->dequant_bit_depth = AOM_BITS_8; cm->alloc_mi = av1_dec_alloc_mi; @@ -146,6 +150,12 @@ void av1_dealloc_dec_jobs(struct AV1DecTileMTData *tile_mt_info) { } } +void av1_dec_free_cb_buf(AV1Decoder *pbi) { + aom_free(pbi->cb_buffer_base); + pbi->cb_buffer_base = NULL; + pbi->cb_buffer_alloc_size = 0; +} + void av1_decoder_remove(AV1Decoder *pbi) { int i; @@ -161,7 +171,7 @@ void av1_decoder_remove(AV1Decoder *pbi) { if (pbi->thread_data) { for (int worker_idx = 0; worker_idx < pbi->max_threads - 1; worker_idx++) { DecWorkerData *const thread_data = pbi->thread_data + worker_idx; - const int use_highbd = pbi->common.use_highbitdepth ? 1 : 0; + const int use_highbd = pbi->common.seq_params.use_highbitdepth ? 1 : 0; av1_free_mc_tmp_buf(thread_data->td, use_highbd); aom_free(thread_data->td); } @@ -172,6 +182,20 @@ void av1_decoder_remove(AV1Decoder *pbi) { AVxWorker *const worker = &pbi->tile_workers[i]; aom_get_worker_interface()->end(worker); } +#if CONFIG_MULTITHREAD + if (pbi->row_mt_mutex_ != NULL) { + pthread_mutex_destroy(pbi->row_mt_mutex_); + aom_free(pbi->row_mt_mutex_); + } + if (pbi->row_mt_cond_ != NULL) { + pthread_cond_destroy(pbi->row_mt_cond_); + aom_free(pbi->row_mt_cond_); + } +#endif + for (i = 0; i < pbi->allocated_tiles; i++) { + TileDataDec *const tile_data = pbi->tile_data + i; + av1_dec_row_mt_dealloc(&tile_data->dec_row_mt_sync); + } aom_free(pbi->tile_data); aom_free(pbi->tile_workers); @@ -181,10 +205,11 @@ void av1_decoder_remove(AV1Decoder *pbi) { av1_dealloc_dec_jobs(&pbi->tile_mt_info); } + av1_dec_free_cb_buf(pbi); #if CONFIG_ACCOUNTING aom_accounting_clear(&pbi->accounting); #endif - const int use_highbd = pbi->common.use_highbitdepth ? 1 : 0; + const int use_highbd = pbi->common.seq_params.use_highbitdepth ? 1 : 0; av1_free_mc_tmp_buf(&pbi->td, use_highbd); aom_free(pbi); @@ -279,7 +304,7 @@ aom_codec_err_t av1_set_reference_dec(AV1_COMMON *cm, int idx, ref_buf->y_buffer = sd->y_buffer; ref_buf->u_buffer = sd->u_buffer; ref_buf->v_buffer = sd->v_buffer; - ref_buf->use_external_refernce_buffers = 1; + ref_buf->use_external_reference_buffers = 1; } } @@ -414,7 +439,10 @@ int av1_receive_compressed_data(AV1Decoder *pbi, size_t size, // Find a free frame buffer. Return error if can not find any. cm->new_fb_idx = get_free_fb(cm); - if (cm->new_fb_idx == INVALID_IDX) return AOM_CODEC_MEM_ERROR; + if (cm->new_fb_idx == INVALID_IDX) { + cm->error.error_code = AOM_CODEC_MEM_ERROR; + return 1; + } // Assign a MV array to the frame buffer. cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx]; @@ -423,6 +451,9 @@ int av1_receive_compressed_data(AV1Decoder *pbi, size_t size, pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; + // The jmp_buf is valid only for the duration of the function that calls + // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 + // before it returns. if (setjmp(cm->error.jmp)) { const AVxWorkerInterface *const winterface = aom_get_worker_interface(); int i; @@ -474,7 +505,13 @@ int av1_receive_compressed_data(AV1Decoder *pbi, size_t size, int frame_decoded = aom_decode_frame_from_obus(pbi, source, source + size, psource); - if (cm->error.error_code != AOM_CODEC_OK) return 1; + if (cm->error.error_code != AOM_CODEC_OK) { + lock_buffer_pool(pool); + decrease_ref_count(cm->new_fb_idx, frame_bufs, pool); + unlock_buffer_pool(pool); + cm->error.setjmp = 0; + return 1; + } #if TXCOEFF_TIMER cm->cum_txcoeff_timer += cm->txcoeff_timer; @@ -493,7 +530,10 @@ int av1_receive_compressed_data(AV1Decoder *pbi, size_t size, pbi->decoding_first_frame = 0; } - if (cm->error.error_code != AOM_CODEC_OK) return 1; + if (cm->error.error_code != AOM_CODEC_OK) { + cm->error.setjmp = 0; + return 1; + } aom_clear_system_state(); diff --git a/third_party/aom/av1/decoder/decoder.h b/third_party/aom/av1/decoder/decoder.h index 42fcc1256..610b98d95 100644 --- a/third_party/aom/av1/decoder/decoder.h +++ b/third_party/aom/av1/decoder/decoder.h @@ -33,6 +33,20 @@ extern "C" { #endif +typedef void (*decode_block_visitor_fn_t)(const AV1_COMMON *const cm, + MACROBLOCKD *const xd, + aom_reader *const r, const int plane, + const int row, const int col, + const TX_SIZE tx_size); + +typedef void (*predict_inter_block_visitor_fn_t)(AV1_COMMON *const cm, + MACROBLOCKD *const xd, + int mi_row, int mi_col, + BLOCK_SIZE bsize); + +typedef void (*cfl_store_inter_block_visitor_fn_t)(AV1_COMMON *const cm, + MACROBLOCKD *const xd); + typedef struct ThreadData { aom_reader *bit_reader; DECLARE_ALIGNED(32, MACROBLOCKD, xd); @@ -41,12 +55,54 @@ typedef struct ThreadData { CB_BUFFER cb_buffer_base; uint8_t *mc_buf[2]; int32_t mc_buf_size; + + decode_block_visitor_fn_t read_coeffs_tx_intra_block_visit; + decode_block_visitor_fn_t predict_and_recon_intra_block_visit; + decode_block_visitor_fn_t read_coeffs_tx_inter_block_visit; + decode_block_visitor_fn_t inverse_tx_inter_block_visit; + predict_inter_block_visitor_fn_t predict_inter_block_visit; + cfl_store_inter_block_visitor_fn_t cfl_store_inter_block_visit; } ThreadData; +typedef struct AV1DecRowMTJobInfo { + int tile_row; + int tile_col; + int mi_row; +} AV1DecRowMTJobInfo; + +typedef struct AV1DecRowMTSyncData { +#if CONFIG_MULTITHREAD + pthread_mutex_t *mutex_; + pthread_cond_t *cond_; +#endif + int allocated_sb_rows; + int *cur_sb_col; + int sync_range; + int mi_rows; + int mi_cols; + int mi_rows_parse_done; + int mi_rows_decode_started; + int num_threads_working; +} AV1DecRowMTSync; + +typedef struct AV1DecRowMTInfo { + int tile_rows_start; + int tile_rows_end; + int tile_cols_start; + int tile_cols_end; + int start_tile; + int end_tile; + int mi_rows_parse_done; + int mi_rows_decode_started; + int mi_rows_to_decode; + int row_mt_exit; +} AV1DecRowMTInfo; + typedef struct TileDataDec { TileInfo tile_info; aom_reader bit_reader; DECLARE_ALIGNED(16, FRAME_CONTEXT, tctx); + AV1DecRowMTSync dec_row_mt_sync; } TileDataDec; typedef struct TileBufferDec { @@ -139,9 +195,8 @@ typedef struct AV1Decoder { int acct_enabled; Accounting accounting; #endif - size_t uncomp_hdr_size; // Size of the uncompressed header - int tg_size; // Number of tiles in the current tilegroup - int tg_start; // First tile in the current tilegroup + int tg_size; // Number of tiles in the current tilegroup + int tg_start; // First tile in the current tilegroup int tg_size_bit_offset; int sequence_header_ready; #if CONFIG_INSPECTION @@ -162,12 +217,27 @@ typedef struct AV1Decoder { int tile_count_minus_1; uint32_t coded_tile_data_size; unsigned int ext_tile_debug; // for ext-tile software debug & testing + unsigned int row_mt; EXTERNAL_REFERENCES ext_refs; size_t tile_list_size; uint8_t *tile_list_output; size_t buffer_sz; + + CB_BUFFER *cb_buffer_base; + int cb_buffer_alloc_size; + + int allocated_row_mt_sync_rows; + +#if CONFIG_MULTITHREAD + pthread_mutex_t *row_mt_mutex_; + pthread_cond_t *row_mt_cond_; +#endif + + AV1DecRowMTInfo frame_row_mt_info; } AV1Decoder; +// Returns 0 on success. Sets pbi->common.error.error_code to a nonzero error +// code and returns a nonzero value on failure. int av1_receive_compressed_data(struct AV1Decoder *pbi, size_t size, const uint8_t **dest); @@ -192,6 +262,10 @@ struct AV1Decoder *av1_decoder_create(BufferPool *const pool); void av1_decoder_remove(struct AV1Decoder *pbi); void av1_dealloc_dec_jobs(struct AV1DecTileMTData *tile_jobs_sync); +void av1_dec_row_mt_dealloc(AV1DecRowMTSync *dec_row_mt_sync); + +void av1_dec_free_cb_buf(AV1Decoder *pbi); + static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, BufferPool *const pool) { if (idx >= 0) { @@ -207,18 +281,6 @@ static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, } } -static INLINE int dec_is_ref_frame_buf(AV1Decoder *const pbi, - RefCntBuffer *frame_buf) { - AV1_COMMON *const cm = &pbi->common; - int i; - for (i = 0; i < INTER_REFS_PER_FRAME; ++i) { - RefBuffer *const ref_frame = &cm->frame_refs[i]; - if (ref_frame->idx == INVALID_IDX) continue; - if (frame_buf == &cm->buffer_pool->frame_bufs[ref_frame->idx]) break; - } - return (i < INTER_REFS_PER_FRAME); -} - #define ACCT_STR __func__ static INLINE int av1_read_uniform(aom_reader *r, int n) { const int l = get_unsigned_bits(n); @@ -238,6 +300,10 @@ void av1_visit_palette(AV1Decoder *const pbi, MACROBLOCKD *const xd, int mi_row, int mi_col, aom_reader *r, BLOCK_SIZE bsize, palette_visitor_fn_t visit); +typedef void (*block_visitor_fn_t)(AV1Decoder *const pbi, ThreadData *const td, + int mi_row, int mi_col, aom_reader *r, + PARTITION_TYPE partition, BLOCK_SIZE bsize); + #ifdef __cplusplus } // extern "C" #endif diff --git a/third_party/aom/av1/decoder/decodetxb.c b/third_party/aom/av1/decoder/decodetxb.c index f9a3e8578..f3ef2d55e 100644 --- a/third_party/aom/av1/decoder/decodetxb.c +++ b/third_party/aom/av1/decoder/decodetxb.c @@ -320,10 +320,14 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *const xd, return cul_level; } -uint8_t av1_read_coeffs_txb_facade(const AV1_COMMON *const cm, - MACROBLOCKD *const xd, aom_reader *const r, - const int row, const int col, - const int plane, const TX_SIZE tx_size) { +void av1_read_coeffs_txb_facade(const AV1_COMMON *const cm, + MACROBLOCKD *const xd, aom_reader *const r, + const int plane, const int row, const int col, + const TX_SIZE tx_size) { +#if TXCOEFF_TIMER + struct aom_usec_timer timer; + aom_usec_timer_start(&timer); +#endif MB_MODE_INFO *const mbmi = xd->mi[0]; struct macroblockd_plane *const pd = &xd->plane[plane]; @@ -337,5 +341,22 @@ uint8_t av1_read_coeffs_txb_facade(const AV1_COMMON *const cm, const uint8_t cul_level = av1_read_coeffs_txb(cm, xd, r, row, col, plane, &txb_ctx, tx_size); av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, col, row); - return cul_level; + + if (is_inter_block(mbmi)) { + PLANE_TYPE plane_type = get_plane_type(plane); + // tx_type will be read out in av1_read_coeffs_txb_facade + const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, row, col, tx_size, + cm->reduced_tx_set_used); + + if (plane == 0) + update_txk_array(mbmi->txk_type, mbmi->sb_type, row, col, tx_size, + tx_type); + } + +#if TXCOEFF_TIMER + aom_usec_timer_mark(&timer); + const int64_t elapsed_time = aom_usec_timer_elapsed(&timer); + cm->txcoeff_timer += elapsed_time; + ++cm->txb_count; +#endif } diff --git a/third_party/aom/av1/decoder/decodetxb.h b/third_party/aom/av1/decoder/decodetxb.h index d0b3d8c7a..687bba958 100644 --- a/third_party/aom/av1/decoder/decodetxb.h +++ b/third_party/aom/av1/decoder/decodetxb.h @@ -25,8 +25,8 @@ uint8_t av1_read_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *const xd, const TXB_CTX *const txb_ctx, const TX_SIZE tx_size); -uint8_t av1_read_coeffs_txb_facade(const AV1_COMMON *const cm, - MACROBLOCKD *const xd, aom_reader *const r, - const int row, const int col, - const int plane, const TX_SIZE tx_size); +void av1_read_coeffs_txb_facade(const AV1_COMMON *const cm, + MACROBLOCKD *const xd, aom_reader *const r, + const int plane, const int row, const int col, + const TX_SIZE tx_size); #endif // DECODETXB_H_ diff --git a/third_party/aom/av1/decoder/dthread.c b/third_party/aom/av1/decoder/dthread.c index ff03502e6..3946c787a 100644 --- a/third_party/aom/av1/decoder/dthread.c +++ b/third_party/aom/av1/decoder/dthread.c @@ -157,8 +157,8 @@ void av1_frameworker_copy_context(AVxWorker *const dst_worker, dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync; av1_frameworker_unlock_stats(src_worker); - dst_cm->bit_depth = src_cm->bit_depth; - dst_cm->use_highbitdepth = src_cm->use_highbitdepth; + dst_cm->seq_params.bit_depth = src_cm->seq_params.bit_depth; + dst_cm->seq_params.use_highbitdepth = src_cm->seq_params.use_highbitdepth; // TODO(zoeliu): To handle parallel decoding dst_cm->prev_frame = src_cm->show_existing_frame ? src_cm->prev_frame : src_cm->cur_frame; @@ -166,8 +166,8 @@ void av1_frameworker_copy_context(AVxWorker *const dst_worker, !src_cm->show_existing_frame ? src_cm->width : src_cm->last_width; dst_cm->last_height = !src_cm->show_existing_frame ? src_cm->height : src_cm->last_height; - dst_cm->subsampling_x = src_cm->subsampling_x; - dst_cm->subsampling_y = src_cm->subsampling_y; + dst_cm->seq_params.subsampling_x = src_cm->seq_params.subsampling_x; + dst_cm->seq_params.subsampling_y = src_cm->seq_params.subsampling_y; dst_cm->frame_type = src_cm->frame_type; dst_cm->last_show_frame = !src_cm->show_existing_frame ? src_cm->show_frame diff --git a/third_party/aom/av1/decoder/dthread.h b/third_party/aom/av1/decoder/dthread.h index 33d89006e..9f854e015 100644 --- a/third_party/aom/av1/decoder/dthread.h +++ b/third_party/aom/av1/decoder/dthread.h @@ -39,7 +39,6 @@ typedef struct FrameWorkerData { const uint8_t *data_end; size_t data_size; void *user_priv; - int result; int worker_id; int received_frame; diff --git a/third_party/aom/av1/decoder/obu.c b/third_party/aom/av1/decoder/obu.c index 482b6415e..715bc6837 100644 --- a/third_party/aom/av1/decoder/obu.c +++ b/third_party/aom/av1/decoder/obu.c @@ -161,6 +161,17 @@ static int is_obu_in_current_operating_point(AV1Decoder *pbi, return 0; } +static int byte_alignment(AV1_COMMON *const cm, + struct aom_read_bit_buffer *const rb) { + while (rb->bit_offset & 7) { + if (aom_rb_read_bit(rb)) { + cm->error.error_code = AOM_CODEC_CORRUPT_FRAME; + return -1; + } + } + return 0; +} + static uint32_t read_temporal_delimiter_obu() { return 0; } // Returns a boolean that indicates success. @@ -173,6 +184,13 @@ static int read_bitstream_level(BitstreamLevel *bl, return 1; } +// Returns whether two sequence headers are consistent with each other. +// TODO(huisu,wtc@google.com): make sure the code matches the spec exactly. +static int are_seq_headers_consistent(const SequenceHeader *seq_params_old, + const SequenceHeader *seq_params_new) { + return !memcmp(seq_params_old, seq_params_new, sizeof(SequenceHeader)); +} + // On success, sets pbi->sequence_header_ready to 1 and returns the number of // bytes read from 'rb'. // On failure, sets pbi->common.error.error_code and returns 0. @@ -184,14 +202,17 @@ static uint32_t read_sequence_header_obu(AV1Decoder *pbi, // Verify rb has been configured to report errors. assert(rb->error_handler); - cm->profile = av1_read_profile(rb); - if (cm->profile > PROFILE_2) { + // Use a local variable to store the information as we decode. At the end, + // if no errors have occurred, cm->seq_params is updated. + SequenceHeader sh = cm->seq_params; + SequenceHeader *const seq_params = &sh; + + seq_params->profile = av1_read_profile(rb); + if (seq_params->profile > PROFILE_2) { cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM; return 0; } - SequenceHeader *const seq_params = &cm->seq_params; - // Still picture or not seq_params->still_picture = aom_rb_read_bit(rb); seq_params->reduced_still_picture_hdr = aom_rb_read_bit(rb); @@ -252,7 +273,8 @@ static uint32_t read_sequence_header_obu(AV1Decoder *pbi, (cm->timing_info.equal_picture_interval || cm->op_params[i].decoder_model_param_present_flag)) { cm->op_params[i].bitrate = max_level_bitrate( - cm->profile, major_minor_to_seq_level_idx(seq_params->level[i]), + seq_params->profile, + major_minor_to_seq_level_idx(seq_params->level[i]), seq_params->tier[i]); // Level with seq_level_idx = 31 returns a high "dummy" bitrate to pass // the check @@ -305,30 +327,49 @@ static uint32_t read_sequence_header_obu(AV1Decoder *pbi, return 0; } - read_sequence_header(cm, rb); + av1_read_sequence_header(cm, rb, seq_params); - av1_read_color_config(cm, rb, pbi->allow_lowbitdepth); + av1_read_color_config(rb, pbi->allow_lowbitdepth, seq_params, &cm->error); + if (!(seq_params->subsampling_x == 0 && seq_params->subsampling_y == 0) && + !(seq_params->subsampling_x == 1 && seq_params->subsampling_y == 1) && + !(seq_params->subsampling_x == 1 && seq_params->subsampling_y == 0)) { + aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM, + "Only 4:4:4, 4:2:2 and 4:2:0 are currently supported, " + "%d %d subsampling is not supported.\n", + seq_params->subsampling_x, seq_params->subsampling_y); + } - cm->film_grain_params_present = aom_rb_read_bit(rb); + seq_params->film_grain_params_present = aom_rb_read_bit(rb); if (av1_check_trailing_bits(pbi, rb) != 0) { // cm->error.error_code is already set. return 0; } + // If a sequence header has been decoded before, we check if the new + // one is consistent with the old one. + if (pbi->sequence_header_ready) { + if (!are_seq_headers_consistent(&cm->seq_params, seq_params)) { + aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM, + "Inconsistent sequence headers received."); + } + } + + cm->seq_params = *seq_params; pbi->sequence_header_ready = 1; return ((rb->bit_offset - saved_bit_offset + 7) >> 3); } +// On success, returns the frame header size. On failure, calls +// aom_internal_error and does not return. static uint32_t read_frame_header_obu(AV1Decoder *pbi, struct aom_read_bit_buffer *rb, const uint8_t *data, const uint8_t **p_data_end, int trailing_bits_present) { - av1_decode_frame_headers_and_setup(pbi, rb, data, p_data_end, - trailing_bits_present); - return (uint32_t)(pbi->uncomp_hdr_size); + return av1_decode_frame_headers_and_setup(pbi, rb, data, p_data_end, + trailing_bits_present); } static int32_t read_tile_group_header(AV1Decoder *pbi, @@ -353,7 +394,6 @@ static int32_t read_tile_group_header(AV1Decoder *pbi, aom_internal_error( &cm->error, AOM_CODEC_UNSUP_BITSTREAM, "For OBU_FRAME type obu tile_start_and_end_present_flag must be 0"); - cm->error.error_code = AOM_CODEC_CORRUPT_FRAME; return -1; } *start_tile = @@ -371,9 +411,12 @@ static uint32_t read_one_tile_group_obu( int start_tile, end_tile; int32_t header_size, tg_payload_size; + assert((rb->bit_offset & 7) == 0); + assert(rb->bit_buffer + aom_rb_bytes_read(rb) == data); + header_size = read_tile_group_header(pbi, rb, &start_tile, &end_tile, tile_start_implicit); - if (header_size == -1) return 0; + if (header_size == -1 || byte_alignment(cm, rb)) return 0; if (start_tile > end_tile) return header_size; data += header_size; av1_decode_tg_tiles_and_wrapup(pbi, data, data_end, p_data_end, start_tile, @@ -386,44 +429,22 @@ static uint32_t read_one_tile_group_obu( return header_size + tg_payload_size; } -// Only called while large_scale_tile = 1. -static uint32_t read_and_decode_one_tile_list(AV1Decoder *pbi, - struct aom_read_bit_buffer *rb, - const uint8_t *data, - const uint8_t *data_end, - const uint8_t **p_data_end, - int *frame_decoding_finished) { - AV1_COMMON *const cm = &pbi->common; - uint32_t tile_list_payload_size = 0; - const int num_tiles = cm->tile_cols * cm->tile_rows; - const int start_tile = 0; - const int end_tile = num_tiles - 1; - int i = 0; - - // Process the tile list info. - pbi->output_frame_width_in_tiles_minus_1 = aom_rb_read_literal(rb, 8); - pbi->output_frame_height_in_tiles_minus_1 = aom_rb_read_literal(rb, 8); - pbi->tile_count_minus_1 = aom_rb_read_literal(rb, 16); - if (pbi->tile_count_minus_1 > 511) { - cm->error.error_code = AOM_CODEC_CORRUPT_FRAME; - return 0; - } - - // Allocate output frame buffer for the tile list. +static void alloc_tile_list_buffer(AV1Decoder *pbi) { // TODO(yunqing): for now, copy each tile's decoded YUV data directly to the // output buffer. This needs to be modified according to the application // requirement. + AV1_COMMON *const cm = &pbi->common; const int tile_width_in_pixels = cm->tile_width * MI_SIZE; const int tile_height_in_pixels = cm->tile_height * MI_SIZE; - const int ssy = cm->subsampling_y; - const int ssx = cm->subsampling_x; + const int ssy = cm->seq_params.subsampling_y; + const int ssx = cm->seq_params.subsampling_x; const int num_planes = av1_num_planes(cm); const size_t yplane_tile_size = tile_height_in_pixels * tile_width_in_pixels; const size_t uvplane_tile_size = (num_planes > 1) ? (tile_height_in_pixels >> ssy) * (tile_width_in_pixels >> ssx) : 0; - const size_t tile_size = (cm->use_highbitdepth ? 2 : 1) * + const size_t tile_size = (cm->seq_params.use_highbitdepth ? 2 : 1) * (yplane_tile_size + 2 * uvplane_tile_size); pbi->tile_list_size = tile_size * (pbi->tile_count_minus_1 + 1); @@ -437,6 +458,83 @@ static uint32_t read_and_decode_one_tile_list(AV1Decoder *pbi, "Failed to allocate the tile list output buffer"); pbi->buffer_sz = pbi->tile_list_size; } +} + +static void copy_decoded_tile_to_tile_list_buffer(AV1Decoder *pbi, + uint8_t **output) { + AV1_COMMON *const cm = &pbi->common; + const int tile_width_in_pixels = cm->tile_width * MI_SIZE; + const int tile_height_in_pixels = cm->tile_height * MI_SIZE; + const int ssy = cm->seq_params.subsampling_y; + const int ssx = cm->seq_params.subsampling_x; + const int num_planes = av1_num_planes(cm); + + // Copy decoded tile to the tile list output buffer. + YV12_BUFFER_CONFIG *cur_frame = get_frame_new_buffer(cm); + const int mi_row = pbi->dec_tile_row * cm->tile_height; + const int mi_col = pbi->dec_tile_col * cm->tile_width; + const int is_hbd = (cur_frame->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; + uint8_t *bufs[MAX_MB_PLANE] = { NULL, NULL, NULL }; + int strides[MAX_MB_PLANE] = { 0, 0, 0 }; + int plane; + + for (plane = 0; plane < num_planes; ++plane) { + int shift_x = plane > 0 ? ssx : 0; + int shift_y = plane > 0 ? ssy : 0; + + bufs[plane] = cur_frame->buffers[plane]; + strides[plane] = + (plane > 0) ? cur_frame->strides[1] : cur_frame->strides[0]; + + bufs[plane] += mi_row * (MI_SIZE >> shift_y) * strides[plane] + + mi_col * (MI_SIZE >> shift_x); + + if (is_hbd) { + bufs[plane] = (uint8_t *)CONVERT_TO_SHORTPTR(bufs[plane]); + strides[plane] *= 2; + } + + int w, h; + w = (plane > 0 && shift_x > 0) ? ((tile_width_in_pixels + 1) >> shift_x) + : tile_width_in_pixels; + w *= (1 + is_hbd); + h = (plane > 0 && shift_y > 0) ? ((tile_height_in_pixels + 1) >> shift_y) + : tile_height_in_pixels; + int j; + + for (j = 0; j < h; ++j) { + memcpy(*output, bufs[plane], w); + bufs[plane] += strides[plane]; + *output += w; + } + } +} + +// Only called while large_scale_tile = 1. +static uint32_t read_and_decode_one_tile_list(AV1Decoder *pbi, + struct aom_read_bit_buffer *rb, + const uint8_t *data, + const uint8_t *data_end, + const uint8_t **p_data_end, + int *frame_decoding_finished) { + AV1_COMMON *const cm = &pbi->common; + uint32_t tile_list_payload_size = 0; + const int num_tiles = cm->tile_cols * cm->tile_rows; + const int start_tile = 0; + const int end_tile = num_tiles - 1; + int i = 0; + + // Process the tile list info. + pbi->output_frame_width_in_tiles_minus_1 = aom_rb_read_literal(rb, 8); + pbi->output_frame_height_in_tiles_minus_1 = aom_rb_read_literal(rb, 8); + pbi->tile_count_minus_1 = aom_rb_read_literal(rb, 16); + if (pbi->tile_count_minus_1 > MAX_TILES - 1) { + cm->error.error_code = AOM_CODEC_CORRUPT_FRAME; + return 0; + } + + // Allocate output frame buffer for the tile list. + alloc_tile_list_buffer(pbi); uint32_t tile_list_info_bytes = 4; tile_list_payload_size += tile_list_info_bytes; @@ -485,45 +583,8 @@ static uint32_t read_and_decode_one_tile_list(AV1Decoder *pbi, data = *p_data_end; assert(data <= data_end); - // Copy decoded tile to the tile list output buffer. - YV12_BUFFER_CONFIG *cur_frame = get_frame_new_buffer(cm); - const int mi_row = pbi->dec_tile_row * cm->tile_height; - const int mi_col = pbi->dec_tile_col * cm->tile_width; - const int is_hbd = (cur_frame->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; - uint8_t *bufs[MAX_MB_PLANE] = { NULL, NULL, NULL }; - int strides[MAX_MB_PLANE] = { 0, 0, 0 }; - int plane; - - for (plane = 0; plane < num_planes; ++plane) { - int shift_x = plane > 0 ? ssx : 0; - int shift_y = plane > 0 ? ssy : 0; - - bufs[plane] = cur_frame->buffers[plane]; - strides[plane] = - (plane > 0) ? cur_frame->strides[1] : cur_frame->strides[0]; - if (is_hbd) { - bufs[plane] = (uint8_t *)CONVERT_TO_SHORTPTR(cur_frame->buffers[plane]); - strides[plane] = - (plane > 0) ? 2 * cur_frame->strides[1] : 2 * cur_frame->strides[0]; - } - - bufs[plane] += mi_row * (MI_SIZE >> shift_y) * strides[plane] + - mi_col * (MI_SIZE >> shift_x); - - int w, h; - w = (plane > 0 && shift_x > 0) ? ((tile_width_in_pixels + 1) >> shift_x) - : tile_width_in_pixels; - w *= (1 + is_hbd); - h = (plane > 0 && shift_y > 0) ? ((tile_height_in_pixels + 1) >> shift_y) - : tile_height_in_pixels; - int j; - - for (j = 0; j < h; ++j) { - memcpy(output, bufs[plane], w); - bufs[plane] += strides[plane]; - output += w; - } - } + // Copy the decoded tile to the tile list output buffer. + copy_decoded_tile_to_tile_list_buffer(pbi, &output); } *frame_decoding_finished = 1; @@ -710,7 +771,6 @@ aom_codec_err_t aom_read_obu_header_and_size(const uint8_t *data, return AOM_CODEC_OK; } -#define EXT_TILE_DEBUG 0 // On success, returns a boolean that indicates whether the decoding of the // current frame is finished. On failure, sets cm->error.error_code and // returns -1. @@ -720,7 +780,7 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data, AV1_COMMON *const cm = &pbi->common; int frame_decoding_finished = 0; int is_first_tg_obu_received = 1; - int frame_header_size = 0; + uint32_t frame_header_size = 0; int seq_header_received = 0; size_t seq_header_size = 0; ObuHeader obu_header; @@ -785,7 +845,7 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data, } } - av1_init_read_bit_buffer(pbi, &rb, data, data_end); + av1_init_read_bit_buffer(pbi, &rb, data, data + payload_size); switch (obu_header.type) { case OBU_TEMPORAL_DELIMITER: @@ -813,21 +873,35 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data, // Only decode first frame header received if (!pbi->seen_frame_header || (cm->large_scale_tile && !pbi->camera_frame_header_ready)) { - pbi->seen_frame_header = 1; frame_header_size = read_frame_header_obu( pbi, &rb, data, p_data_end, obu_header.type != OBU_FRAME); - if (cm->large_scale_tile) pbi->camera_frame_header_ready = 1; + pbi->seen_frame_header = 1; + if (!pbi->ext_tile_debug && cm->large_scale_tile) + pbi->camera_frame_header_ready = 1; + } else { + // TODO(wtc): Verify that the frame_header_obu is identical to the + // original frame_header_obu. For now just skip frame_header_size + // bytes in the bit buffer. + if (frame_header_size > payload_size) { + cm->error.error_code = AOM_CODEC_CORRUPT_FRAME; + return -1; + } + assert(rb.bit_offset == 0); + rb.bit_offset = 8 * frame_header_size; } decoded_payload_size = frame_header_size; - pbi->frame_header_size = (size_t)frame_header_size; + pbi->frame_header_size = frame_header_size; if (cm->show_existing_frame) { + if (obu_header.type == OBU_FRAME) { + cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM; + return -1; + } frame_decoding_finished = 1; pbi->seen_frame_header = 0; break; } -#if !EXT_TILE_DEBUG // In large scale tile coding, decode the common camera frame header // before any tile list OBU. if (!pbi->ext_tile_debug && pbi->camera_frame_header_ready) { @@ -838,17 +912,18 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data, *p_data_end = data_end; break; } -#endif // EXT_TILE_DEBUG if (obu_header.type != OBU_FRAME) break; obu_payload_offset = frame_header_size; + // Byte align the reader before reading the tile group. + if (byte_alignment(cm, &rb)) return -1; AOM_FALLTHROUGH_INTENDED; // fall through to read tile group. case OBU_TILE_GROUP: if (!pbi->seen_frame_header) { cm->error.error_code = AOM_CODEC_CORRUPT_FRAME; return -1; } - if ((size_t)(data_end - data) < obu_payload_offset) { + if (obu_payload_offset > payload_size) { cm->error.error_code = AOM_CODEC_CORRUPT_FRAME; return -1; } @@ -904,4 +979,3 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data, return frame_decoding_finished; } -#undef EXT_TILE_DEBUG |