summaryrefslogtreecommitdiffstats
path: root/third_party/aom/av1/encoder/encodeframe.c
diff options
context:
space:
mode:
authortrav90 <travawine@palemoon.org>2018-10-19 23:05:00 -0500
committertrav90 <travawine@palemoon.org>2018-10-19 23:05:03 -0500
commitd2499ead93dc4298c0882fe98902acb1b5209f99 (patch)
treecb0b942aed59e5108f9a3e9d64e7b77854383421 /third_party/aom/av1/encoder/encodeframe.c
parent41fbdea457bf50c0a43e1c27c5cbf7f0a3a9eb33 (diff)
downloadUXP-d2499ead93dc4298c0882fe98902acb1b5209f99.tar
UXP-d2499ead93dc4298c0882fe98902acb1b5209f99.tar.gz
UXP-d2499ead93dc4298c0882fe98902acb1b5209f99.tar.lz
UXP-d2499ead93dc4298c0882fe98902acb1b5209f99.tar.xz
UXP-d2499ead93dc4298c0882fe98902acb1b5209f99.zip
Update libaom to commit ID 1e227d41f0616de9548a673a83a21ef990b62591
Diffstat (limited to 'third_party/aom/av1/encoder/encodeframe.c')
-rw-r--r--third_party/aom/av1/encoder/encodeframe.c1075
1 files changed, 697 insertions, 378 deletions
diff --git a/third_party/aom/av1/encoder/encodeframe.c b/third_party/aom/av1/encoder/encodeframe.c
index 27ca53761..cb226c59e 100644
--- a/third_party/aom/av1/encoder/encodeframe.c
+++ b/third_party/aom/av1/encoder/encodeframe.c
@@ -40,11 +40,11 @@
#include "av1/common/reconinter.h"
#include "av1/common/seg_common.h"
#include "av1/common/tile_common.h"
+#include "av1/common/warped_motion.h"
#include "av1/encoder/aq_complexity.h"
#include "av1/encoder/aq_cyclicrefresh.h"
#include "av1/encoder/aq_variance.h"
-#include "av1/common/warped_motion.h"
#include "av1/encoder/global_motion.h"
#include "av1/encoder/encodeframe.h"
#include "av1/encoder/encodemb.h"
@@ -56,6 +56,7 @@
#include "av1/encoder/partition_model_weights.h"
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
+#include "av1/encoder/reconinter_enc.h"
#include "av1/encoder/segmentation.h"
#include "av1/encoder/tokenize.h"
@@ -348,8 +349,9 @@ static void reset_tx_size(MACROBLOCK *x, MB_MODE_INFO *mbmi,
x->skip = 0;
}
-static void update_state(const AV1_COMP *const cpi, TileDataEnc *tile_data,
- ThreadData *td, PICK_MODE_CONTEXT *ctx, int mi_row,
+static void update_state(const AV1_COMP *const cpi,
+ const TileDataEnc *const tile_data, ThreadData *td,
+ const PICK_MODE_CONTEXT *const ctx, int mi_row,
int mi_col, BLOCK_SIZE bsize, RUN_TYPE dry_run) {
int i, x_idx, y;
const AV1_COMMON *const cm = &cpi->common;
@@ -359,7 +361,7 @@ static void update_state(const AV1_COMP *const cpi, TileDataEnc *tile_data,
MACROBLOCKD *const xd = &x->e_mbd;
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = xd->plane;
- MB_MODE_INFO *mi = &ctx->mic;
+ const MB_MODE_INFO *const mi = &ctx->mic;
MB_MODE_INFO *const mi_addr = xd->mi[0];
const struct segmentation *const seg = &cm->seg;
const int bw = mi_size_wide[mi->sb_type];
@@ -505,12 +507,12 @@ static int set_deltaq_rdmult(const AV1_COMP *const cpi, MACROBLOCKD *const xd) {
cpi, cm->base_qindex + xd->delta_qindex + cm->y_dc_delta_q);
}
-static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
+static void rd_pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
MACROBLOCK *const x, int mi_row, int mi_col,
RD_STATS *rd_cost, PARTITION_TYPE partition,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
int64_t best_rd) {
- const AV1_COMMON *const cm = &cpi->common;
+ AV1_COMMON *const cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
TileInfo *const tile_info = &tile_data->tile_info;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -522,6 +524,13 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
const DELTAQ_MODE deltaq_mode = cpi->oxcf.deltaq_mode;
int i, orig_rdmult;
+ if (best_rd < 0) {
+ ctx->rdcost = INT64_MAX;
+ ctx->skip = 0;
+ av1_invalid_rd_stats(rd_cost);
+ return;
+ }
+
aom_clear_system_state();
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
@@ -588,9 +597,10 @@ static void rd_pick_sb_modes(const AV1_COMP *const cpi, TileDataEnc *tile_data,
if (aq_mode == VARIANCE_AQ) {
if (cpi->vaq_refresh) {
- const int energy =
- bsize <= BLOCK_16X16 ? x->mb_energy : av1_block_energy(cpi, x, bsize);
- mbmi->segment_id = av1_vaq_segment_id(energy);
+ const int energy = bsize <= BLOCK_16X16
+ ? x->mb_energy
+ : av1_log_block_var(cpi, x, bsize);
+ mbmi->segment_id = energy;
}
x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
} else if (aq_mode == COMPLEXITY_AQ) {
@@ -1407,8 +1417,8 @@ static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data,
ThreadData *td, TOKENEXTRA **tp, int mi_row, int mi_col,
RUN_TYPE dry_run, BLOCK_SIZE bsize,
- PARTITION_TYPE partition, PICK_MODE_CONTEXT *ctx,
- int *rate) {
+ PARTITION_TYPE partition,
+ const PICK_MODE_CONTEXT *const ctx, int *rate) {
TileInfo *const tile = &tile_data->tile_info;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *xd = &x->e_mbd;
@@ -1691,7 +1701,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
- x->mb_energy = av1_block_energy(cpi, x, bsize);
+ x->mb_energy = av1_log_block_var(cpi, x, bsize);
}
if (do_partition_search &&
@@ -1728,7 +1738,20 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
pc_tree->partitioning = partition;
}
}
-
+ for (int b = 0; b < 2; ++b) {
+ pc_tree->horizontal[b].skip_ref_frame_mask = 0;
+ pc_tree->vertical[b].skip_ref_frame_mask = 0;
+ }
+ for (int b = 0; b < 3; ++b) {
+ pc_tree->horizontala[b].skip_ref_frame_mask = 0;
+ pc_tree->horizontalb[b].skip_ref_frame_mask = 0;
+ pc_tree->verticala[b].skip_ref_frame_mask = 0;
+ pc_tree->verticalb[b].skip_ref_frame_mask = 0;
+ }
+ for (int b = 0; b < 4; ++b) {
+ pc_tree->horizontal4[b].skip_ref_frame_mask = 0;
+ pc_tree->vertical4[b].skip_ref_frame_mask = 0;
+ }
switch (partition) {
case PARTITION_NONE:
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
@@ -1741,7 +1764,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
mi_row + hbs < cm->mi_rows) {
RD_STATS tmp_rdc;
- PICK_MODE_CONTEXT *ctx_h = &pc_tree->horizontal[0];
+ const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
av1_init_rd_stats(&tmp_rdc);
update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1);
encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
@@ -1765,7 +1788,7 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
mi_col + hbs < cm->mi_cols) {
RD_STATS tmp_rdc;
- PICK_MODE_CONTEXT *ctx_v = &pc_tree->vertical[0];
+ const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0];
av1_init_rd_stats(&tmp_rdc);
update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1);
encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
@@ -1812,7 +1835,8 @@ static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
case PARTITION_HORZ_A:
case PARTITION_HORZ_B:
case PARTITION_HORZ_4:
- case PARTITION_VERT_4: assert(0 && "Cannot handle extended partiton types");
+ case PARTITION_VERT_4:
+ assert(0 && "Cannot handle extended partition types");
default: assert(0); break;
}
@@ -2164,7 +2188,8 @@ static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
}
-static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
+static INLINE void load_pred_mv(MACROBLOCK *x,
+ const PICK_MODE_CONTEXT *const ctx) {
memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
}
@@ -2221,12 +2246,11 @@ static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
// Try searching for an encoding for the given subblock. Returns zero if the
// rdcost is already too high (to tell the caller not to bother searching for
// encodings of further subblocks)
-static int rd_try_subblock(const AV1_COMP *const cpi, ThreadData *td,
- TileDataEnc *tile_data, TOKENEXTRA **tp,
- int is_first, int is_last, int mi_row, int mi_col,
- BLOCK_SIZE subsize, RD_STATS *best_rdc,
- RD_STATS *sum_rdc, RD_STATS *this_rdc,
- PARTITION_TYPE partition,
+static int rd_try_subblock(AV1_COMP *const cpi, ThreadData *td,
+ TileDataEnc *tile_data, TOKENEXTRA **tp, int is_last,
+ int mi_row, int mi_col, BLOCK_SIZE subsize,
+ RD_STATS *best_rdc, RD_STATS *sum_rdc,
+ RD_STATS *this_rdc, PARTITION_TYPE partition,
PICK_MODE_CONTEXT *prev_ctx,
PICK_MODE_CONTEXT *this_ctx) {
#define RTS_X_RATE_NOCOEF_ARG
@@ -2236,25 +2260,20 @@ static int rd_try_subblock(const AV1_COMP *const cpi, ThreadData *td,
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, prev_ctx);
- // On the first time around, write the rd stats straight to sum_rdc. Also, we
- // should treat sum_rdc as containing zeros (even if it doesn't) to avoid
- // having to zero it at the start.
- if (is_first) this_rdc = sum_rdc;
- const int64_t spent_rdcost = is_first ? 0 : sum_rdc->rdcost;
- const int64_t rdcost_remaining = best_rdc->rdcost - spent_rdcost;
+ const int64_t rdcost_remaining = best_rdc->rdcost == INT64_MAX
+ ? INT64_MAX
+ : (best_rdc->rdcost - sum_rdc->rdcost);
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc,
RTS_X_RATE_NOCOEF_ARG partition, subsize, this_ctx,
rdcost_remaining);
- if (!is_first) {
- if (this_rdc->rate == INT_MAX) {
- sum_rdc->rdcost = INT64_MAX;
- } else {
- sum_rdc->rate += this_rdc->rate;
- sum_rdc->dist += this_rdc->dist;
- sum_rdc->rdcost += this_rdc->rdcost;
- }
+ if (this_rdc->rate == INT_MAX) {
+ sum_rdc->rdcost = INT64_MAX;
+ } else {
+ sum_rdc->rate += this_rdc->rate;
+ sum_rdc->dist += this_rdc->dist;
+ sum_rdc->rdcost += this_rdc->rdcost;
}
if (sum_rdc->rdcost >= RTS_MAX_RDCOST) return 0;
@@ -2271,7 +2290,7 @@ static int rd_try_subblock(const AV1_COMP *const cpi, ThreadData *td,
#undef RTS_MAX_RDCOST
}
-static void rd_test_partition3(const AV1_COMP *const cpi, ThreadData *td,
+static void rd_test_partition3(AV1_COMP *const cpi, ThreadData *td,
TileDataEnc *tile_data, TOKENEXTRA **tp,
PC_TREE *pc_tree, RD_STATS *best_rdc,
PICK_MODE_CONTEXT ctxs[3],
@@ -2284,13 +2303,16 @@ static void rd_test_partition3(const AV1_COMP *const cpi, ThreadData *td,
MACROBLOCKD *const xd = &x->e_mbd;
RD_STATS sum_rdc, this_rdc;
#define RTP_STX_TRY_ARGS
-
- if (!rd_try_subblock(cpi, td, tile_data, tp, 1, 0, mi_row0, mi_col0, subsize0,
+ int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+ av1_init_rd_stats(&sum_rdc);
+ sum_rdc.rate = x->partition_cost[pl][partition];
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
+ if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row0, mi_col0, subsize0,
best_rdc, &sum_rdc, &this_rdc,
RTP_STX_TRY_ARGS partition, ctx, &ctxs[0]))
return;
- if (!rd_try_subblock(cpi, td, tile_data, tp, 0, 0, mi_row1, mi_col1, subsize1,
+ if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row1, mi_col1, subsize1,
best_rdc, &sum_rdc, &this_rdc,
RTP_STX_TRY_ARGS partition, &ctxs[0], &ctxs[1]))
return;
@@ -2302,15 +2324,13 @@ static void rd_test_partition3(const AV1_COMP *const cpi, ThreadData *td,
// difference (obviously) doesn't contribute to the error.
const int try_block2 = 1;
if (try_block2 &&
- !rd_try_subblock(cpi, td, tile_data, tp, 0, 1, mi_row2, mi_col2, subsize2,
+ !rd_try_subblock(cpi, td, tile_data, tp, 1, mi_row2, mi_col2, subsize2,
best_rdc, &sum_rdc, &this_rdc,
RTP_STX_TRY_ARGS partition, &ctxs[1], &ctxs[2]))
return;
if (sum_rdc.rdcost >= best_rdc->rdcost) return;
- int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
- sum_rdc.rate += x->partition_cost[pl][partition];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
if (sum_rdc.rdcost >= best_rdc->rdcost) return;
@@ -2321,45 +2341,6 @@ static void rd_test_partition3(const AV1_COMP *const cpi, ThreadData *td,
#undef RTP_STX_TRY_ARGS
}
-#if CONFIG_DIST_8X8
-static int64_t dist_8x8_yuv(const AV1_COMP *const cpi, MACROBLOCK *const x,
- uint8_t *src_plane_8x8[MAX_MB_PLANE],
- uint8_t *dst_plane_8x8[MAX_MB_PLANE]) {
- const AV1_COMMON *const cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
- MACROBLOCKD *const xd = &x->e_mbd;
- int64_t dist_8x8, dist_8x8_uv, total_dist;
- const int src_stride = x->plane[0].src.stride;
- int plane;
-
- const int dst_stride = xd->plane[0].dst.stride;
- dist_8x8 =
- av1_dist_8x8(cpi, x, src_plane_8x8[0], src_stride, dst_plane_8x8[0],
- dst_stride, BLOCK_8X8, 8, 8, 8, 8, x->qindex)
- << 4;
-
- // Compute chroma distortion for a luma 8x8 block
- dist_8x8_uv = 0;
-
- if (num_planes > 1) {
- for (plane = 1; plane < MAX_MB_PLANE; ++plane) {
- unsigned sse;
- const int src_stride_uv = x->plane[plane].src.stride;
- const int dst_stride_uv = xd->plane[plane].dst.stride;
- const int ssx = xd->plane[plane].subsampling_x;
- const int ssy = xd->plane[plane].subsampling_y;
- const BLOCK_SIZE plane_bsize = get_plane_block_size(BLOCK_8X8, ssx, ssy);
-
- cpi->fn_ptr[plane_bsize].vf(src_plane_8x8[plane], src_stride_uv,
- dst_plane_8x8[plane], dst_stride_uv, &sse);
- dist_8x8_uv += (int64_t)sse << 4;
- }
- }
-
- return total_dist = dist_8x8 + dist_8x8_uv;
-}
-#endif // CONFIG_DIST_8X8
-
static void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
pc_tree->partitioning = PARTITION_NONE;
pc_tree->cb_search_range = SEARCH_FULL_PLANE;
@@ -2372,7 +2353,7 @@ static void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
}
}
-static void rd_pick_sqr_partition(const AV1_COMP *const cpi, ThreadData *td,
+static void rd_pick_sqr_partition(AV1_COMP *const cpi, ThreadData *td,
TileDataEnc *tile_data, TOKENEXTRA **tp,
int mi_row, int mi_col, BLOCK_SIZE bsize,
RD_STATS *rd_cost, int64_t best_rd,
@@ -2410,7 +2391,12 @@ static void rd_pick_sqr_partition(const AV1_COMP *const cpi, ThreadData *td,
(void)*tp_orig;
(void)split_rd;
- av1_zero(pc_tree->pc_tree_stats);
+ if (best_rd < 0) {
+ pc_tree->none.rdcost = INT64_MAX;
+ pc_tree->none.skip = 0;
+ av1_invalid_rd_stats(rd_cost);
+ return;
+ }
pc_tree->pc_tree_stats.valid = 1;
// Override partition costs at the edges of the frame in the same
@@ -2441,9 +2427,11 @@ static void rd_pick_sqr_partition(const AV1_COMP *const cpi, ThreadData *td,
#ifndef NDEBUG
// Nothing should rely on the default value of this array (which is just
- // leftover from encoding the previous block. Setting it to magic number
+ // leftover from encoding the previous block. Setting it to fixed pattern
// when debugging.
- memset(x->blk_skip, 234, sizeof(x->blk_skip));
+ // bit 0, 1, 2 are blk_skip of each plane
+ // bit 4, 5, 6 are initialization checking of each plane
+ memset(x->blk_skip, 0x77, sizeof(x->blk_skip));
#endif // NDEBUG
assert(mi_size_wide[bsize] == mi_size_high[bsize]);
@@ -2456,19 +2444,35 @@ static void rd_pick_sqr_partition(const AV1_COMP *const cpi, ThreadData *td,
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
- x->mb_energy = av1_block_energy(cpi, x, bsize);
+ x->mb_energy = av1_log_block_var(cpi, x, bsize);
xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
xd->left_txfm_context =
xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
+#if CONFIG_DIST_8X8
+ if (x->using_dist_8x8) {
+ if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8)
+ do_square_split = 0;
+ }
+#endif
+
// PARTITION_NONE
if (partition_none_allowed) {
- if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
-
+ int pt_cost = 0;
+ if (bsize_at_least_8x8) {
+ pc_tree->partitioning = PARTITION_NONE;
+ pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
+ ? partition_cost[PARTITION_NONE]
+ : 0;
+ }
+ int64_t partition_rd_cost = RDCOST(x->rdmult, pt_cost, 0);
+ int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
+ ? INT64_MAX
+ : (best_rdc.rdcost - partition_rd_cost);
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
- PARTITION_NONE, bsize, ctx_none, best_rdc.rdcost);
+ PARTITION_NONE, bsize, ctx_none, best_remain_rdcost);
pc_tree->pc_tree_stats.rdcost = ctx_none->rdcost;
pc_tree->pc_tree_stats.skip = ctx_none->skip;
@@ -2476,9 +2480,6 @@ static void rd_pick_sqr_partition(const AV1_COMP *const cpi, ThreadData *td,
if (none_rd) *none_rd = this_rdc.rdcost;
if (this_rdc.rate != INT_MAX) {
if (bsize_at_least_8x8) {
- const int pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
- ? partition_cost[PARTITION_NONE]
- : 0;
this_rdc.rate += pt_cost;
this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
}
@@ -2520,17 +2521,6 @@ static void rd_pick_sqr_partition(const AV1_COMP *const cpi, ThreadData *td,
int64_t temp_best_rdcost = best_rdc.rdcost;
pn_rdc = best_rdc;
-#if CONFIG_DIST_8X8
- uint8_t *src_plane_8x8[MAX_MB_PLANE], *dst_plane_8x8[MAX_MB_PLANE];
-
- if (x->using_dist_8x8 && bsize == BLOCK_8X8) {
- for (int i = 0; i < MAX_MB_PLANE; i++) {
- src_plane_8x8[i] = x->plane[i].src.buf;
- dst_plane_8x8[i] = xd->plane[i].dst.buf;
- }
- }
-#endif // CONFIG_DIST_8X8
-
// PARTITION_SPLIT
if (do_square_split) {
int reached_last_index = 0;
@@ -2548,6 +2538,8 @@ static void rd_pick_sqr_partition(const AV1_COMP *const cpi, ThreadData *td,
pc_tree->split[idx]->index = idx;
int64_t *p_split_rd = &split_rd[idx];
+ // TODO(Cherma) : Account for partition cost while passing best rd to
+ // rd_pick_sqr_partition()
rd_pick_sqr_partition(cpi, td, tile_data, tp, mi_row + y_idx,
mi_col + x_idx, subsize, &this_rdc,
temp_best_rdcost - sum_rdc.rdcost,
@@ -2568,14 +2560,6 @@ static void rd_pick_sqr_partition(const AV1_COMP *const cpi, ThreadData *td,
}
reached_last_index = (idx == 4);
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && reached_last_index &&
- sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
- sum_rdc.dist = dist_8x8_yuv(cpi, x, src_plane_8x8, dst_plane_8x8);
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
- }
-#endif // CONFIG_DIST_8X8
-
if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
sum_rdc.rate += partition_cost[PARTITION_SPLIT];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
@@ -2634,14 +2618,6 @@ static void rd_pick_sqr_partition(const AV1_COMP *const cpi, ThreadData *td,
}
}
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && best_rdc.rate < INT_MAX &&
- best_rdc.dist < INT64_MAX && bsize == BLOCK_4X4 && pc_tree->index == 3) {
- encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
- pc_tree, NULL);
- }
-#endif // CONFIG_DIST_8X8
-
if (bsize == cm->seq_params.sb_size) {
assert(best_rdc.rate < INT_MAX);
assert(best_rdc.dist < INT64_MAX);
@@ -2791,6 +2767,99 @@ static int ml_prune_2pass_split_partition(const PC_TREE_STATS *pc_tree_stats,
}
#undef FEATURE_SIZE
+static void ml_prune_rect_partition(const AV1_COMP *const cpi,
+ const MACROBLOCK *const x, BLOCK_SIZE bsize,
+ int64_t best_rd, int64_t none_rd,
+ int64_t *split_rd,
+ int *const dst_prune_horz,
+ int *const dst_prune_vert) {
+ if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return;
+ best_rd = AOMMAX(best_rd, 1);
+ const NN_CONFIG *nn_config = NULL;
+ const float prob_thresholds[5] = { 0.01f, 0.01f, 0.004f, 0.002f, 0.002f };
+ float cur_thresh = 0.0f;
+ switch (bsize) {
+ case BLOCK_8X8:
+ nn_config = &av1_rect_partition_nnconfig_8;
+ cur_thresh = prob_thresholds[0];
+ break;
+ case BLOCK_16X16:
+ nn_config = &av1_rect_partition_nnconfig_16;
+ cur_thresh = prob_thresholds[1];
+ break;
+ case BLOCK_32X32:
+ nn_config = &av1_rect_partition_nnconfig_32;
+ cur_thresh = prob_thresholds[2];
+ break;
+ case BLOCK_64X64:
+ nn_config = &av1_rect_partition_nnconfig_64;
+ cur_thresh = prob_thresholds[3];
+ break;
+ case BLOCK_128X128:
+ nn_config = &av1_rect_partition_nnconfig_128;
+ cur_thresh = prob_thresholds[4];
+ break;
+ default: assert(0 && "Unexpected bsize.");
+ }
+ if (!nn_config) return;
+ aom_clear_system_state();
+
+ // 1. Compute input features
+ float features[9];
+
+ // RD cost ratios
+ for (int i = 0; i < 5; i++) features[i] = 1.0f;
+ if (none_rd > 0 && none_rd < 1000000000)
+ features[0] = (float)none_rd / (float)best_rd;
+ for (int i = 0; i < 4; i++) {
+ if (split_rd[i] > 0 && split_rd[i] < 1000000000)
+ features[1 + i] = (float)split_rd[i] / (float)best_rd;
+ }
+
+ // Variance ratios
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ int whole_block_variance;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ whole_block_variance = av1_high_get_sby_perpixel_variance(
+ cpi, &x->plane[0].src, bsize, xd->bd);
+ } else {
+ whole_block_variance =
+ av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+ }
+ whole_block_variance = AOMMAX(whole_block_variance, 1);
+
+ int split_variance[4];
+ const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
+ struct buf_2d buf;
+ buf.stride = x->plane[0].src.stride;
+ const int bw = block_size_wide[bsize];
+ for (int i = 0; i < 4; ++i) {
+ const int x_idx = (i & 1) * bw / 2;
+ const int y_idx = (i >> 1) * bw / 2;
+ buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride;
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ split_variance[i] =
+ av1_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd);
+ } else {
+ split_variance[i] = av1_get_sby_perpixel_variance(cpi, &buf, subsize);
+ }
+ }
+
+ for (int i = 0; i < 4; i++)
+ features[5 + i] = (float)split_variance[i] / (float)whole_block_variance;
+
+ // 2. Do the prediction and prune 0-2 partitions based on their probabilities
+ float raw_scores[3] = { 0.0f };
+ av1_nn_predict(features, nn_config, raw_scores);
+ float probs[3] = { 0.0f };
+ av1_nn_softmax(raw_scores, probs, 3);
+
+ // probs[0] is the probability of the fact that both rectangular partitions
+ // are worse than current best_rd
+ if (probs[1] <= cur_thresh) (*dst_prune_horz) = 1;
+ if (probs[2] <= cur_thresh) (*dst_prune_vert) = 1;
+}
+
// Use a ML model to predict if horz_a, horz_b, vert_a, and vert_b should be
// considered.
static void ml_prune_ab_partition(BLOCK_SIZE bsize, int part_ctx, int var_ctx,
@@ -2880,13 +2949,14 @@ static void ml_prune_ab_partition(BLOCK_SIZE bsize, int part_ctx, int var_ctx,
#define FEATURES 18
#define LABELS 4
// Use a ML model to predict if horz4 and vert4 should be considered.
-static void ml_prune_4_partition(const AV1_COMP *const cpi,
- const MACROBLOCK *const x, BLOCK_SIZE bsize,
- int part_ctx, int64_t best_rd,
- int64_t horz_rd[2], int64_t vert_rd[2],
- int64_t split_rd[4],
+static void ml_prune_4_partition(const AV1_COMP *const cpi, MACROBLOCK *const x,
+ BLOCK_SIZE bsize, int part_ctx,
+ int64_t best_rd, int64_t horz_rd[2],
+ int64_t vert_rd[2], int64_t split_rd[4],
int *const partition_horz4_allowed,
- int *const partition_vert4_allowed) {
+ int *const partition_vert4_allowed,
+ unsigned int pb_source_variance, int mi_row,
+ int mi_col) {
if (best_rd >= 1000000000) return;
const NN_CONFIG *nn_config = NULL;
switch (bsize) {
@@ -2903,7 +2973,7 @@ static void ml_prune_4_partition(const AV1_COMP *const cpi,
float features[FEATURES];
int feature_index = 0;
features[feature_index++] = (float)part_ctx;
- features[feature_index++] = (float)get_unsigned_bits(x->source_variance);
+ features[feature_index++] = (float)get_unsigned_bits(pb_source_variance);
const int rdcost = (int)AOMMIN(INT_MAX, best_rd);
int sub_block_rdcost[8] = { 0 };
@@ -2937,6 +3007,8 @@ static void ml_prune_4_partition(const AV1_COMP *const cpi,
{
BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4);
BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4);
+ av1_setup_src_planes(x, cpi->source, mi_row, mi_col,
+ av1_num_planes(&cpi->common));
const int src_stride = x->plane[0].src.stride;
const uint8_t *src = x->plane[0].src.buf;
const MACROBLOCKD *const xd = &x->e_mbd;
@@ -2990,7 +3062,7 @@ static void ml_prune_4_partition(const AV1_COMP *const cpi,
}
}
- const float denom = (float)(x->source_variance + 1);
+ const float denom = (float)(pb_source_variance + 1);
const float low_b = 0.1f;
const float high_b = 10.0f;
for (int i = 0; i < 4; ++i) {
@@ -3022,9 +3094,9 @@ static void ml_prune_4_partition(const AV1_COMP *const cpi,
// Make decisions based on the model scores.
int thresh = max_score;
switch (bsize) {
- case BLOCK_16X16: thresh -= 400; break;
- case BLOCK_32X32: thresh -= 400; break;
- case BLOCK_64X64: thresh -= 100; break;
+ case BLOCK_16X16: thresh -= 500; break;
+ case BLOCK_32X32: thresh -= 500; break;
+ case BLOCK_64X64: thresh -= 200; break;
default: break;
}
*partition_horz4_allowed = 0;
@@ -3039,10 +3111,73 @@ static void ml_prune_4_partition(const AV1_COMP *const cpi,
#undef FEATURES
#undef LABELS
+#define FEATURES 4
+// ML-based partition search breakout.
+static int ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
+ const MACROBLOCK *const x,
+ const RD_STATS *const rd_stats,
+ unsigned int pb_source_variance) {
+ const NN_CONFIG *nn_config = NULL;
+ int thresh = 0;
+ switch (bsize) {
+ case BLOCK_8X8:
+ nn_config = &av1_partition_breakout_nnconfig_8;
+ thresh = cpi->sf.ml_partition_search_breakout_thresh[0];
+ break;
+ case BLOCK_16X16:
+ nn_config = &av1_partition_breakout_nnconfig_16;
+ thresh = cpi->sf.ml_partition_search_breakout_thresh[1];
+ break;
+ case BLOCK_32X32:
+ nn_config = &av1_partition_breakout_nnconfig_32;
+ thresh = cpi->sf.ml_partition_search_breakout_thresh[2];
+ break;
+ case BLOCK_64X64:
+ nn_config = &av1_partition_breakout_nnconfig_64;
+ thresh = cpi->sf.ml_partition_search_breakout_thresh[3];
+ break;
+ case BLOCK_128X128:
+ nn_config = &av1_partition_breakout_nnconfig_128;
+ thresh = cpi->sf.ml_partition_search_breakout_thresh[4];
+ break;
+ default: assert(0 && "Unexpected bsize.");
+ }
+ if (!nn_config || thresh < 0) return 0;
+
+ // Generate feature values.
+ float features[FEATURES];
+ int feature_index = 0;
+ aom_clear_system_state();
+
+ const int num_pels_log2 = num_pels_log2_lookup[bsize];
+ float rate_f = (float)AOMMIN(rd_stats->rate, INT_MAX);
+ rate_f = ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) *
+ rate_f;
+ features[feature_index++] = rate_f;
+
+ const float dist_f =
+ (float)(AOMMIN(rd_stats->dist, INT_MAX) >> num_pels_log2);
+ features[feature_index++] = dist_f;
+
+ features[feature_index++] = (float)pb_source_variance;
+
+ const int dc_q = (int)x->plane[0].dequant_QTX[0];
+ features[feature_index++] = (float)(dc_q * dc_q) / 256.0f;
+ assert(feature_index == FEATURES);
+
+ // Calculate score using the NN model.
+ float score = 0.0f;
+ av1_nn_predict(features, nn_config, &score);
+
+ // Make decision.
+ return (int)(score * 100) >= thresh;
+}
+#undef FEATURES
+
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
-static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
+static void rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
TileDataEnc *tile_data, TOKENEXTRA **tp,
int mi_row, int mi_col, BLOCK_SIZE bsize,
RD_STATS *rd_cost, int64_t best_rd,
@@ -3068,6 +3203,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
pl >= 0 ? x->partition_cost[pl] : x->partition_cost[0];
int do_rectangular_split = 1;
+ int64_t cur_none_rd = 0;
int64_t split_rd[4] = { 0, 0, 0, 0 };
int64_t horz_rd[2] = { 0, 0 };
int64_t vert_rd[2] = { 0, 0 };
@@ -3077,6 +3213,12 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
int vert_ctx_is_ready = 0;
BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
+ if (best_rd < 0) {
+ pc_tree->none.rdcost = INT64_MAX;
+ pc_tree->none.skip = 0;
+ av1_invalid_rd_stats(rd_cost);
+ return;
+ }
if (bsize == cm->seq_params.sb_size) x->must_find_valid_partition = 0;
// Override skipping rectangular partition operations for edge blocks
@@ -3129,9 +3271,11 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
#ifndef NDEBUG
// Nothing should rely on the default value of this array (which is just
- // leftover from encoding the previous block. Setting it to magic number
+ // leftover from encoding the previous block. Setting it to fixed pattern
// when debugging.
- memset(x->blk_skip, 234, sizeof(x->blk_skip));
+ // bit 0, 1, 2 are blk_skip of each plane
+ // bit 4, 5, 6 are initialization checking of each plane
+ memset(x->blk_skip, 0x77, sizeof(x->blk_skip));
#endif // NDEBUG
assert(mi_size_wide[bsize] == mi_size_high[bsize]);
@@ -3143,7 +3287,7 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
- x->mb_energy = av1_block_energy(cpi, x, bsize);
+ x->mb_energy = av1_log_block_var(cpi, x, bsize);
if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) {
const int cb_partition_search_ctrl =
@@ -3285,22 +3429,56 @@ static void rd_pick_partition(const AV1_COMP *const cpi, ThreadData *td,
}
#endif
+ // Ref frames picked in the [i_th] quarter subblock during square partition
+ // RD search. It may be used to prune ref frame selection of rect partitions.
+ int ref_frames_used[4] = {
+ 0,
+ };
+
BEGIN_PARTITION_SEARCH:
if (x->must_find_valid_partition) {
partition_none_allowed = has_rows && has_cols;
partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8;
partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8;
}
+
+ // Partition block source pixel variance.
+ unsigned int pb_source_variance = UINT_MAX;
+
+#if CONFIG_DIST_8X8
+ if (x->using_dist_8x8) {
+ if (block_size_high[bsize] <= 8) partition_horz_allowed = 0;
+ if (block_size_wide[bsize] <= 8) partition_vert_allowed = 0;
+ if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8)
+ do_square_split = 0;
+ }
+#endif
+
// PARTITION_NONE
if (partition_none_allowed) {
+ int pt_cost = 0;
+ if (bsize_at_least_8x8) {
+ pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
+ ? partition_cost[PARTITION_NONE]
+ : 0;
+ }
+ int64_t partition_rd_cost = RDCOST(x->rdmult, pt_cost, 0);
+ int64_t best_remain_rdcost = (best_rdc.rdcost == INT64_MAX)
+ ? INT64_MAX
+ : (best_rdc.rdcost - partition_rd_cost);
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
- PARTITION_NONE, bsize, ctx_none, best_rdc.rdcost);
+ PARTITION_NONE, bsize, ctx_none, best_remain_rdcost);
+ pb_source_variance = x->source_variance;
if (none_rd) *none_rd = this_rdc.rdcost;
+ cur_none_rd = this_rdc.rdcost;
if (this_rdc.rate != INT_MAX) {
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ const int ref_type = av1_ref_frame_type(ctx_none->mic.ref_frame);
+ for (int i = 0; i < 4; ++i) {
+ ref_frames_used[i] |= (1 << ref_type);
+ }
+ }
if (bsize_at_least_8x8) {
- const int pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
- ? partition_cost[PARTITION_NONE]
- : 0;
this_rdc.rate += pt_cost;
this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
}
@@ -3318,16 +3496,29 @@ BEGIN_PARTITION_SEARCH:
best_rdc = this_rdc;
if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
- // If all y, u, v transform blocks in this partition are skippable, and
- // the dist & rate are within the thresholds, the partition search is
- // terminated for current branch of the partition search tree.
- // The dist & rate thresholds are set to 0 at speed 0 to disable the
- // early termination at that speed.
- if (!x->e_mbd.lossless[xd->mi[0]->segment_id] &&
- (ctx_none->skippable && best_rdc.dist < dist_breakout_thr &&
- best_rdc.rate < rate_breakout_thr)) {
- do_square_split = 0;
- do_rectangular_split = 0;
+ if ((do_square_split || do_rectangular_split) &&
+ !x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) {
+ const int use_ml_based_breakout =
+ bsize <= cpi->sf.use_square_partition_only_threshold &&
+ bsize > BLOCK_4X4 && xd->bd == 8;
+ if (use_ml_based_breakout) {
+ if (ml_predict_breakout(cpi, bsize, x, &this_rdc,
+ pb_source_variance)) {
+ do_square_split = 0;
+ do_rectangular_split = 0;
+ }
+ }
+
+ // If all y, u, v transform blocks in this partition are skippable,
+ // and the dist & rate are within the thresholds, the partition
+ // search is terminated for current branch of the partition search
+ // tree. The dist & rate thresholds are set to 0 at speed 0 to
+ // disable the early termination at that speed.
+ if (best_rdc.dist < dist_breakout_thr &&
+ best_rdc.rate < rate_breakout_thr) {
+ do_square_split = 0;
+ do_rectangular_split = 0;
+ }
}
#if CONFIG_FP_MB_STATS
@@ -3384,24 +3575,14 @@ BEGIN_PARTITION_SEARCH:
// store estimated motion vector
if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
-#if CONFIG_DIST_8X8
- uint8_t *src_plane_8x8[MAX_MB_PLANE], *dst_plane_8x8[MAX_MB_PLANE];
-
- if (x->using_dist_8x8 && bsize == BLOCK_8X8) {
- for (int i = 0; i < num_planes; i++) {
- src_plane_8x8[i] = x->plane[i].src.buf;
- dst_plane_8x8[i] = xd->plane[i].dst.buf;
- }
- }
-#endif // CONFIG_DIST_8X8
-
// PARTITION_SPLIT
if (do_square_split) {
av1_init_rd_stats(&sum_rdc);
- int reached_last_index = 0;
subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
- int idx;
+ sum_rdc.rate = partition_cost[PARTITION_SPLIT];
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
+ int idx;
for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) {
const int x_idx = (idx & 1) * mi_step;
const int y_idx = (idx >> 1) * mi_step;
@@ -3413,8 +3594,13 @@ BEGIN_PARTITION_SEARCH:
pc_tree->split[idx]->index = idx;
int64_t *p_split_rd = &split_rd[idx];
+ int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
+ ? INT64_MAX
+ : (best_rdc.rdcost - sum_rdc.rdcost);
+ if (cpi->sf.prune_ref_frame_for_rect_partitions)
+ pc_tree->split[idx]->none.rate = INT_MAX;
rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx,
- subsize, &this_rdc, best_rdc.rdcost - sum_rdc.rdcost,
+ subsize, &this_rdc, best_remain_rdcost,
pc_tree->split[idx], p_split_rd);
if (this_rdc.rate == INT_MAX) {
@@ -3424,11 +3610,16 @@ BEGIN_PARTITION_SEARCH:
sum_rdc.rate += this_rdc.rate;
sum_rdc.dist += this_rdc.dist;
sum_rdc.rdcost += this_rdc.rdcost;
-
+ if (cpi->sf.prune_ref_frame_for_rect_partitions &&
+ pc_tree->split[idx]->none.rate != INT_MAX) {
+ const int ref_type =
+ av1_ref_frame_type(pc_tree->split[idx]->none.mic.ref_frame);
+ ref_frames_used[idx] |= (1 << ref_type);
+ }
if (idx <= 1 && (bsize <= BLOCK_8X8 ||
pc_tree->split[idx]->partitioning == PARTITION_NONE)) {
- MB_MODE_INFO *const mbmi = &(pc_tree->split[idx]->none.mic);
- PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+ const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none.mic;
+ const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
// Neither palette mode nor cfl predicted
if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
if (mbmi->uv_mode != UV_CFL_PRED) split_ctx_is_ready[idx] = 1;
@@ -3436,60 +3627,83 @@ BEGIN_PARTITION_SEARCH:
}
}
}
- reached_last_index = (idx == 4);
-
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && reached_last_index &&
- sum_rdc.rdcost != INT64_MAX && bsize == BLOCK_8X8) {
- int64_t dist_8x8;
- dist_8x8 = dist_8x8_yuv(cpi, x, src_plane_8x8, dst_plane_8x8);
-#ifdef DEBUG_DIST_8X8
- // TODO(anyone): Fix dist-8x8 assert failure here when CFL is enabled
- if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8 && 0 /*!CONFIG_CFL*/)
- assert(sum_rdc.dist == dist_8x8);
-#endif // DEBUG_DIST_8X8
- sum_rdc.dist = dist_8x8;
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
- }
-#endif // CONFIG_DIST_8X8
+ const int reached_last_index = (idx == 4);
if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rate += partition_cost[PARTITION_SPLIT];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
if (sum_rdc.rdcost < best_rdc.rdcost) {
best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_SPLIT;
}
- } else if (cpi->sf.less_rectangular_check) {
+ } else if (cpi->sf.less_rectangular_check_level > 0) {
// skip rectangular partition test when larger block size
// gives better rd cost
- do_rectangular_split &= !partition_none_allowed;
+ if (cpi->sf.less_rectangular_check_level == 2 || idx <= 2)
+ do_rectangular_split &= !partition_none_allowed;
}
restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
} // if (do_split)
+ pc_tree->horizontal[0].skip_ref_frame_mask = 0;
+ pc_tree->horizontal[1].skip_ref_frame_mask = 0;
+ pc_tree->vertical[0].skip_ref_frame_mask = 0;
+ pc_tree->vertical[1].skip_ref_frame_mask = 0;
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ int used_frames;
+ used_frames = ref_frames_used[0] | ref_frames_used[1];
+ if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[2] | ref_frames_used[3];
+ if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[0] | ref_frames_used[2];
+ if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[1] | ref_frames_used[3];
+ if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames;
+ }
+
+ int prune_horz = 0;
+ int prune_vert = 0;
+ if (cpi->sf.ml_prune_rect_partition && !frame_is_intra_only(cm) &&
+ (partition_horz_allowed || partition_vert_allowed)) {
+ av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes);
+ ml_prune_rect_partition(cpi, x, bsize, best_rdc.rdcost, cur_none_rd,
+ split_rd, &prune_horz, &prune_vert);
+ }
+
// PARTITION_HORZ
- if (partition_horz_allowed &&
+ if (partition_horz_allowed && !prune_horz &&
(do_rectangular_split || active_h_edge(cpi, mi_row, mi_step))) {
av1_init_rd_stats(&sum_rdc);
subsize = get_partition_subsize(bsize, PARTITION_HORZ);
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
- partition_none_allowed)
+ partition_none_allowed) {
pc_tree->horizontal[0].pred_interp_filter =
av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
-
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
+ }
+ int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
+ ? INT64_MAX
+ : (best_rdc.rdcost - sum_rdc.rdcost);
+ sum_rdc.rate = partition_cost[PARTITION_HORZ];
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
PARTITION_HORZ, subsize, &pc_tree->horizontal[0],
- best_rdc.rdcost);
- horz_rd[0] = sum_rdc.rdcost;
+ best_remain_rdcost);
+
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+ } else {
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
+ }
+ horz_rd[0] = this_rdc.rdcost;
if (sum_rdc.rdcost < best_rdc.rdcost && has_rows) {
- PICK_MODE_CONTEXT *ctx_h = &pc_tree->horizontal[0];
- MB_MODE_INFO *const mbmi = &(pc_tree->horizontal[0].mic);
- PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+ const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
+ const MB_MODE_INFO *const mbmi = &pc_tree->horizontal[0].mic;
+ const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
// Neither palette mode nor cfl predicted
if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
if (mbmi->uv_mode != UV_CFL_PRED) horz_ctx_is_ready = 1;
@@ -3501,24 +3715,15 @@ BEGIN_PARTITION_SEARCH:
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_h);
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
- partition_none_allowed)
+ partition_none_allowed) {
pc_tree->horizontal[1].pred_interp_filter =
av1_extract_interp_filter(ctx_h->mic.interp_filters, 0);
-
+ }
rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
best_rdc.rdcost - sum_rdc.rdcost);
horz_rd[1] = this_rdc.rdcost;
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
- update_state(cpi, tile_data, td, &pc_tree->horizontal[1],
- mi_row + mi_step, mi_col, subsize, DRY_RUN_NORMAL);
- encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL,
- mi_row + mi_step, mi_col, subsize, NULL);
- }
-#endif // CONFIG_DIST_8X8
-
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
} else {
@@ -3526,24 +3731,9 @@ BEGIN_PARTITION_SEARCH:
sum_rdc.dist += this_rdc.dist;
sum_rdc.rdcost += this_rdc.rdcost;
}
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && sum_rdc.rdcost != INT64_MAX &&
- bsize == BLOCK_8X8) {
- int64_t dist_8x8;
- dist_8x8 = dist_8x8_yuv(cpi, x, src_plane_8x8, dst_plane_8x8);
-#ifdef DEBUG_DIST_8X8
- // TODO(anyone): Fix dist-8x8 assert failure here when CFL is enabled
- if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8 && 0 /*!CONFIG_CFL*/)
- assert(sum_rdc.dist == dist_8x8);
-#endif // DEBUG_DIST_8X8
- sum_rdc.dist = dist_8x8;
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
- }
-#endif // CONFIG_DIST_8X8
}
if (sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rate += partition_cost[PARTITION_HORZ];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
if (sum_rdc.rdcost < best_rdc.rdcost) {
best_rdc = sum_rdc;
@@ -3555,7 +3745,7 @@ BEGIN_PARTITION_SEARCH:
}
// PARTITION_VERT
- if (partition_vert_allowed &&
+ if (partition_vert_allowed && !prune_vert &&
(do_rectangular_split || active_v_edge(cpi, mi_col, mi_step))) {
av1_init_rd_stats(&sum_rdc);
subsize = get_partition_subsize(bsize, PARTITION_VERT);
@@ -3563,18 +3753,31 @@ BEGIN_PARTITION_SEARCH:
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
- partition_none_allowed)
+ partition_none_allowed) {
pc_tree->vertical[0].pred_interp_filter =
av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
-
- rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc,
+ }
+ sum_rdc.rate = partition_cost[PARTITION_VERT];
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
+ int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
+ ? INT64_MAX
+ : (best_rdc.rdcost - sum_rdc.rdcost);
+ rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc,
PARTITION_VERT, subsize, &pc_tree->vertical[0],
- best_rdc.rdcost);
- vert_rd[0] = sum_rdc.rdcost;
+ best_remain_rdcost);
+
+ if (this_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+ } else {
+ sum_rdc.rate += this_rdc.rate;
+ sum_rdc.dist += this_rdc.dist;
+ sum_rdc.rdcost += this_rdc.rdcost;
+ }
+ vert_rd[0] = this_rdc.rdcost;
const int64_t vert_max_rdcost = best_rdc.rdcost;
if (sum_rdc.rdcost < vert_max_rdcost && has_cols) {
- MB_MODE_INFO *const mbmi = &(pc_tree->vertical[0].mic);
- PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
+ const MB_MODE_INFO *const mbmi = &pc_tree->vertical[0].mic;
+ const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
// Neither palette mode nor cfl predicted
if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
if (mbmi->uv_mode != UV_CFL_PRED) vert_ctx_is_ready = 1;
@@ -3587,24 +3790,15 @@ BEGIN_PARTITION_SEARCH:
if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
- partition_none_allowed)
+ partition_none_allowed) {
pc_tree->vertical[1].pred_interp_filter =
av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
-
+ }
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
PARTITION_VERT, subsize, &pc_tree->vertical[1],
best_rdc.rdcost - sum_rdc.rdcost);
vert_rd[1] = this_rdc.rdcost;
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && this_rdc.rate != INT_MAX && bsize == BLOCK_8X8) {
- update_state(cpi, tile_data, td, &pc_tree->vertical[1], mi_row,
- mi_col + mi_step, subsize, DRY_RUN_NORMAL);
- encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
- mi_col + mi_step, subsize, NULL);
- }
-#endif // CONFIG_DIST_8X8
-
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
} else {
@@ -3612,25 +3806,9 @@ BEGIN_PARTITION_SEARCH:
sum_rdc.dist += this_rdc.dist;
sum_rdc.rdcost += this_rdc.rdcost;
}
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && sum_rdc.rdcost != INT64_MAX &&
- bsize == BLOCK_8X8) {
- int64_t dist_8x8;
- dist_8x8 = dist_8x8_yuv(cpi, x, src_plane_8x8, dst_plane_8x8);
-#ifdef DEBUG_DIST_8X8
- // TODO(anyone): Fix dist-8x8 assert failure here when CFL is enabled
- if (x->tune_metric == AOM_TUNE_PSNR && xd->bd == 8 &&
- 0 /* !CONFIG_CFL */)
- assert(sum_rdc.dist == dist_8x8);
-#endif // DEBUG_DIST_8X8
- sum_rdc.dist = dist_8x8;
- sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
- }
-#endif // CONFIG_DIST_8X8
}
if (sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rate += partition_cost[PARTITION_VERT];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
if (sum_rdc.rdcost < best_rdc.rdcost) {
best_rdc = sum_rdc;
@@ -3641,6 +3819,17 @@ BEGIN_PARTITION_SEARCH:
restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
}
+ if (pb_source_variance == UINT_MAX) {
+ av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes);
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ pb_source_variance = av1_high_get_sby_perpixel_variance(
+ cpi, &x->plane[0].src, bsize, xd->bd);
+ } else {
+ pb_source_variance =
+ av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+ }
+ }
+
const int ext_partition_allowed =
do_rectangular_split && bsize > BLOCK_8X8 && partition_none_allowed;
@@ -3649,15 +3838,26 @@ BEGIN_PARTITION_SEARCH:
int horzab_partition_allowed = ext_partition_allowed;
int vertab_partition_allowed = ext_partition_allowed;
+#if CONFIG_DIST_8X8
+ if (x->using_dist_8x8) {
+ if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8) {
+ horzab_partition_allowed = 0;
+ vertab_partition_allowed = 0;
+ }
+ }
+#endif
+
if (cpi->sf.prune_ext_partition_types_search_level) {
if (cpi->sf.prune_ext_partition_types_search_level == 1) {
+ // TODO(debargha,huisu@google.com): may need to tune the threshold for
+ // pb_source_variance.
horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
(pc_tree->partitioning == PARTITION_NONE &&
- x->source_variance < 32) ||
+ pb_source_variance < 32) ||
pc_tree->partitioning == PARTITION_SPLIT);
vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
(pc_tree->partitioning == PARTITION_NONE &&
- x->source_variance < 32) ||
+ pb_source_variance < 32) ||
pc_tree->partitioning == PARTITION_SPLIT);
} else {
horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
@@ -3712,6 +3912,9 @@ BEGIN_PARTITION_SEARCH:
if (cpi->sf.ml_prune_ab_partition && ext_partition_allowed &&
partition_horz_allowed && partition_vert_allowed) {
+ // TODO(huisu@google.com): x->source_variance may not be the current block's
+ // variance. The correct one to use is pb_source_variance.
+ // Need to re-train the model to fix it.
ml_prune_ab_partition(bsize, pc_tree->partitioning,
get_unsigned_bits(x->source_variance),
best_rdc.rdcost, horz_rd, vert_rd, split_rd,
@@ -3736,6 +3939,21 @@ BEGIN_PARTITION_SEARCH:
pc_tree->horizontala[1].rd_mode_is_ready = 1;
}
}
+ pc_tree->horizontala[0].skip_ref_frame_mask = 0;
+ pc_tree->horizontala[1].skip_ref_frame_mask = 0;
+ pc_tree->horizontala[2].skip_ref_frame_mask = 0;
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ int used_frames;
+ used_frames = ref_frames_used[0];
+ if (used_frames)
+ pc_tree->horizontala[0].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[1];
+ if (used_frames)
+ pc_tree->horizontala[1].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[2] | ref_frames_used[3];
+ if (used_frames)
+ pc_tree->horizontala[2].skip_ref_frame_mask = ~used_frames;
+ }
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
pc_tree->horizontala, ctx_none, mi_row, mi_col, bsize,
PARTITION_HORZ_A, mi_row, mi_col, bsize2, mi_row,
@@ -3754,6 +3972,21 @@ BEGIN_PARTITION_SEARCH:
pc_tree->horizontalb[0].mic.partition = PARTITION_HORZ_B;
pc_tree->horizontalb[0].rd_mode_is_ready = 1;
}
+ pc_tree->horizontalb[0].skip_ref_frame_mask = 0;
+ pc_tree->horizontalb[1].skip_ref_frame_mask = 0;
+ pc_tree->horizontalb[2].skip_ref_frame_mask = 0;
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ int used_frames;
+ used_frames = ref_frames_used[0] | ref_frames_used[1];
+ if (used_frames)
+ pc_tree->horizontalb[0].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[2];
+ if (used_frames)
+ pc_tree->horizontalb[1].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[3];
+ if (used_frames)
+ pc_tree->horizontalb[2].skip_ref_frame_mask = ~used_frames;
+ }
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
pc_tree->horizontalb, ctx_none, mi_row, mi_col, bsize,
PARTITION_HORZ_B, mi_row, mi_col, subsize,
@@ -3773,6 +4006,18 @@ BEGIN_PARTITION_SEARCH:
pc_tree->verticala[0].mic.partition = PARTITION_VERT_A;
pc_tree->verticala[0].rd_mode_is_ready = 1;
}
+ pc_tree->verticala[0].skip_ref_frame_mask = 0;
+ pc_tree->verticala[1].skip_ref_frame_mask = 0;
+ pc_tree->verticala[2].skip_ref_frame_mask = 0;
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ int used_frames;
+ used_frames = ref_frames_used[0];
+ if (used_frames) pc_tree->verticala[0].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[2];
+ if (used_frames) pc_tree->verticala[1].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[1] | ref_frames_used[3];
+ if (used_frames) pc_tree->verticala[2].skip_ref_frame_mask = ~used_frames;
+ }
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
pc_tree->verticala, ctx_none, mi_row, mi_col, bsize,
PARTITION_VERT_A, mi_row, mi_col, bsize2,
@@ -3791,6 +4036,18 @@ BEGIN_PARTITION_SEARCH:
pc_tree->verticalb[0].mic.partition = PARTITION_VERT_B;
pc_tree->verticalb[0].rd_mode_is_ready = 1;
}
+ pc_tree->verticalb[0].skip_ref_frame_mask = 0;
+ pc_tree->verticalb[1].skip_ref_frame_mask = 0;
+ pc_tree->verticalb[2].skip_ref_frame_mask = 0;
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ int used_frames;
+ used_frames = ref_frames_used[0] | ref_frames_used[2];
+ if (used_frames) pc_tree->verticalb[0].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[1];
+ if (used_frames) pc_tree->verticalb[1].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[3];
+ if (used_frames) pc_tree->verticalb[2].skip_ref_frame_mask = ~used_frames;
+ }
rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
pc_tree->verticalb, ctx_none, mi_row, mi_col, bsize,
PARTITION_VERT_B, mi_row, mi_col, subsize, mi_row,
@@ -3823,9 +4080,19 @@ BEGIN_PARTITION_SEARCH:
partition_horz_allowed && partition_vert_allowed) {
ml_prune_4_partition(cpi, x, bsize, pc_tree->partitioning, best_rdc.rdcost,
horz_rd, vert_rd, split_rd, &partition_horz4_allowed,
- &partition_vert4_allowed);
+ &partition_vert4_allowed, pb_source_variance, mi_row,
+ mi_col);
}
+#if CONFIG_DIST_8X8
+ if (x->using_dist_8x8) {
+ if (block_size_high[bsize] <= 16 || block_size_wide[bsize] <= 16) {
+ partition_horz4_allowed = 0;
+ partition_vert4_allowed = 0;
+ }
+ }
+#endif
+
// PARTITION_HORZ_4
if (partition_horz4_allowed && has_rows &&
(do_rectangular_split || active_h_edge(cpi, mi_row, mi_step))) {
@@ -3834,25 +4101,33 @@ BEGIN_PARTITION_SEARCH:
PICK_MODE_CONTEXT *ctx_prev = ctx_none;
subsize = get_partition_subsize(bsize, PARTITION_HORZ_4);
+ sum_rdc.rate = partition_cost[PARTITION_HORZ_4];
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
for (int i = 0; i < 4; ++i) {
- int this_mi_row = mi_row + i * quarter_step;
+ const int this_mi_row = mi_row + i * quarter_step;
if (i > 0 && this_mi_row >= cm->mi_rows) break;
PICK_MODE_CONTEXT *ctx_this = &pc_tree->horizontal4[i];
ctx_this->rd_mode_is_ready = 0;
- if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 0), (i == 3),
- this_mi_row, mi_col, subsize, &best_rdc, &sum_rdc,
- &this_rdc, PARTITION_HORZ_4, ctx_prev, ctx_this))
+ ctx_this->skip_ref_frame_mask = 0;
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ const int used_frames = i <= 1
+ ? (ref_frames_used[0] | ref_frames_used[1])
+ : (ref_frames_used[2] | ref_frames_used[3]);
+ if (used_frames) ctx_this->skip_ref_frame_mask = ~used_frames;
+ }
+ if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), this_mi_row,
+ mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc,
+ PARTITION_HORZ_4, ctx_prev, ctx_this))
break;
ctx_prev = ctx_this;
}
if (sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rate += partition_cost[PARTITION_HORZ_4];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
if (sum_rdc.rdcost < best_rdc.rdcost) {
best_rdc = sum_rdc;
@@ -3870,16 +4145,25 @@ BEGIN_PARTITION_SEARCH:
PICK_MODE_CONTEXT *ctx_prev = ctx_none;
subsize = get_partition_subsize(bsize, PARTITION_VERT_4);
+ sum_rdc.rate = partition_cost[PARTITION_VERT_4];
+ sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
for (int i = 0; i < 4; ++i) {
- int this_mi_col = mi_col + i * quarter_step;
+ const int this_mi_col = mi_col + i * quarter_step;
if (i > 0 && this_mi_col >= cm->mi_cols) break;
PICK_MODE_CONTEXT *ctx_this = &pc_tree->vertical4[i];
ctx_this->rd_mode_is_ready = 0;
- if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 0), (i == 3), mi_row,
+ ctx_this->skip_ref_frame_mask = 0;
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ const int used_frames = i <= 1
+ ? (ref_frames_used[0] | ref_frames_used[2])
+ : (ref_frames_used[1] | ref_frames_used[3]);
+ if (used_frames) ctx_this->skip_ref_frame_mask = ~used_frames;
+ }
+ if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), mi_row,
this_mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc,
PARTITION_VERT_4, ctx_prev, ctx_this))
break;
@@ -3888,7 +4172,6 @@ BEGIN_PARTITION_SEARCH:
}
if (sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rate += partition_cost[PARTITION_VERT_4];
sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
if (sum_rdc.rdcost < best_rdc.rdcost) {
best_rdc = sum_rdc;
@@ -3924,14 +4207,6 @@ BEGIN_PARTITION_SEARCH:
}
}
-#if CONFIG_DIST_8X8
- if (x->using_dist_8x8 && best_rdc.rate < INT_MAX &&
- best_rdc.dist < INT64_MAX && bsize == BLOCK_4X4 && pc_tree->index == 3) {
- encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
- pc_tree, NULL);
- }
-#endif // CONFIG_DIST_8X8
-
if (bsize == cm->seq_params.sb_size) {
assert(best_rdc.rate < INT_MAX);
assert(best_rdc.dist < INT64_MAX);
@@ -3950,6 +4225,15 @@ static void init_first_partition_pass_stats_tables(
}
}
+// clear pc_tree_stats
+static INLINE void clear_pc_tree_stats(PC_TREE *pt) {
+ if (pt == NULL) return;
+ pt->pc_tree_stats.valid = 0;
+ for (int i = 0; i < 4; ++i) {
+ clear_pc_tree_stats(pt->split[i]);
+ }
+}
+
// Minimum number of samples to trigger the
// mode_pruning_based_on_two_pass_partition_search feature.
#define FIRST_PARTITION_PASS_MIN_SAMPLES 16
@@ -3963,7 +4247,6 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
SPEED_FEATURES *const sf = &cpi->sf;
- int mi_col;
const int leaf_nodes = 256;
// Initialize the left context for the new SB row
@@ -3977,26 +4260,16 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
}
}
+ PC_TREE *const pc_root =
+ td->pc_root[cm->seq_params.mib_size_log2 - MIN_MIB_SIZE_LOG2];
// Code each SB in the row
- for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
+ for (int mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
mi_col += cm->seq_params.mib_size) {
- const struct segmentation *const seg = &cm->seg;
- int dummy_rate;
- int64_t dummy_dist;
- RD_STATS dummy_rdc;
- int i;
- int seg_skip = 0;
-
- const int idx_str = cm->mi_stride * mi_row + mi_col;
- MB_MODE_INFO **mi = cm->mi_grid_visible + idx_str;
- PC_TREE *const pc_root =
- td->pc_root[cm->seq_params.mib_size_log2 - MIN_MIB_SIZE_LOG2];
-
av1_fill_coeff_costs(&td->mb, xd->tile_ctx, num_planes);
av1_fill_mode_rates(cm, x, xd->tile_ctx);
if (sf->adaptive_pred_interp_filter) {
- for (i = 0; i < leaf_nodes; ++i) {
+ for (int i = 0; i < leaf_nodes; ++i) {
td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
@@ -4015,10 +4288,12 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
av1_zero(x->pred_mv);
pc_root->index = 0;
+ const struct segmentation *const seg = &cm->seg;
+ int seg_skip = 0;
if (seg->enabled) {
const uint8_t *const map =
seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
- int segment_id =
+ const int segment_id =
map ? get_segment_id(cm, map, cm->seq_params.sb_size, mi_row, mi_col)
: 0;
seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
@@ -4039,15 +4314,14 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
cpi, block_wavelet_energy_level);
} else {
const int block_var_level =
- av1_block_energy(cpi, x, cm->seq_params.sb_size);
+ av1_log_block_var(cpi, x, cm->seq_params.sb_size);
x->sb_energy_level = block_var_level;
offset_qindex =
av1_compute_deltaq_from_energy_level(cpi, block_var_level);
}
- int qmask = ~(cm->delta_q_res - 1);
+ const int qmask = ~(cm->delta_q_res - 1);
int current_qindex = clamp(cm->base_qindex + offset_qindex,
cm->delta_q_res, 256 - cm->delta_q_res);
-
current_qindex =
((current_qindex - cm->base_qindex + cm->delta_q_res / 2) & qmask) +
cm->base_qindex;
@@ -4058,18 +4332,16 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
xd->mi[0]->current_qindex = current_qindex;
av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id);
if (cpi->oxcf.deltaq_mode == DELTA_Q_LF) {
- int j, k;
- int lfmask = ~(cm->delta_lf_res - 1);
- int delta_lf_from_base = offset_qindex / 2;
- delta_lf_from_base =
- ((delta_lf_from_base + cm->delta_lf_res / 2) & lfmask);
+ const int lfmask = ~(cm->delta_lf_res - 1);
+ const int delta_lf_from_base =
+ ((offset_qindex / 2 + cm->delta_lf_res / 2) & lfmask);
// pre-set the delta lf for loop filter. Note that this value is set
// before mi is assigned for each block in current superblock
- for (j = 0; j < AOMMIN(cm->seq_params.mib_size, cm->mi_rows - mi_row);
- j++) {
- for (k = 0; k < AOMMIN(cm->seq_params.mib_size, cm->mi_cols - mi_col);
- k++) {
+ for (int j = 0;
+ j < AOMMIN(cm->seq_params.mib_size, cm->mi_rows - mi_row); j++) {
+ for (int k = 0;
+ k < AOMMIN(cm->seq_params.mib_size, cm->mi_cols - mi_col); k++) {
cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)]
.delta_lf_from_base =
clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
@@ -4085,19 +4357,24 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
}
}
+ int dummy_rate;
+ int64_t dummy_dist;
+ RD_STATS dummy_rdc;
+ const int idx_str = cm->mi_stride * mi_row + mi_col;
+ MB_MODE_INFO **mi = cm->mi_grid_visible + idx_str;
x->source_variance = UINT_MAX;
if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
- BLOCK_SIZE bsize;
set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->seq_params.sb_size);
- bsize = seg_skip ? cm->seq_params.sb_size : sf->always_this_block_size;
+ const BLOCK_SIZE bsize =
+ seg_skip ? cm->seq_params.sb_size : sf->always_this_block_size;
set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
cm->seq_params.sb_size, &dummy_rate, &dummy_dist, 1,
pc_root);
} else if (cpi->partition_search_skippable_frame) {
- BLOCK_SIZE bsize;
set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->seq_params.sb_size);
- bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
+ const BLOCK_SIZE bsize =
+ get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
cm->seq_params.sb_size, &dummy_rate, &dummy_dist, 1,
@@ -4113,9 +4390,9 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
reset_partition(pc_root, cm->seq_params.sb_size);
x->use_cb_search_range = 0;
init_first_partition_pass_stats_tables(x->first_partition_pass_stats);
+ // Do the first pass if we need two pass partition search
if (cpi->sf.two_pass_partition_search &&
- cpi->sf.use_square_partition_only_threshold <
- cm->seq_params.sb_size &&
+ cpi->sf.use_square_partition_only_threshold > BLOCK_4X4 &&
mi_row + mi_size_high[cm->seq_params.sb_size] < cm->mi_rows &&
mi_col + mi_size_wide[cm->seq_params.sb_size] < cm->mi_cols &&
cm->frame_type != KEY_FRAME) {
@@ -4123,6 +4400,7 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
// Reset the stats tables.
if (sf->mode_pruning_based_on_two_pass_partition_search)
av1_zero(x->first_partition_pass_stats);
+ clear_pc_tree_stats(pc_root);
rd_pick_sqr_partition(cpi, td, tile_data, tp, mi_row, mi_col,
cm->seq_params.sb_size, &dummy_rdc, INT64_MAX,
pc_root, NULL);
@@ -4130,7 +4408,7 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
x->source_variance = UINT_MAX;
if (sf->adaptive_pred_interp_filter) {
- for (i = 0; i < leaf_nodes; ++i) {
+ for (int i = 0; i < leaf_nodes; ++i) {
td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
@@ -4157,7 +4435,7 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
x->use_cb_search_range = 1;
if (sf->mode_pruning_based_on_two_pass_partition_search) {
- for (i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) {
+ for (int i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) {
FIRST_PARTITION_PASS_STATS *const stat =
&x->first_partition_pass_stats[i];
if (stat->sample_counts < FIRST_PARTITION_PASS_MIN_SAMPLES) {
@@ -4174,21 +4452,17 @@ static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td,
}
}
}
-
- rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
- cm->seq_params.sb_size, &dummy_rdc, INT64_MAX,
- pc_root, NULL);
- } else {
- rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
- cm->seq_params.sb_size, &dummy_rdc, INT64_MAX,
- pc_root, NULL);
}
+
+ rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
+ cm->seq_params.sb_size, &dummy_rdc, INT64_MAX, pc_root,
+ NULL);
}
#if CONFIG_COLLECT_INTER_MODE_RD_STATS
// TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
if (cpi->sf.inter_mode_rd_model_estimation && cm->tile_cols == 1 &&
cm->tile_rows == 1) {
- av1_inter_mode_data_fit(x->rdmult);
+ av1_inter_mode_data_fit(tile_data, x->rdmult);
}
#endif
}
@@ -4233,6 +4507,32 @@ static TX_MODE select_tx_mode(const AV1_COMP *cpi) {
return cpi->common.tx_mode;
}
+void av1_alloc_tile_data(AV1_COMP *cpi) {
+ AV1_COMMON *const cm = &cpi->common;
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
+ int tile_col, tile_row;
+
+ if (cpi->tile_data != NULL) aom_free(cpi->tile_data);
+ CHECK_MEM_ERROR(
+ cm, cpi->tile_data,
+ aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
+ cpi->allocated_tiles = tile_cols * tile_rows;
+
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ TileDataEnc *const tile_data =
+ &cpi->tile_data[tile_row * tile_cols + tile_col];
+ int i, j;
+ for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
+ for (j = 0; j < MAX_MODES; ++j) {
+ tile_data->thresh_freq_fact[i][j] = 32;
+ tile_data->mode_map[i][j] = j;
+ }
+ }
+ }
+}
+
void av1_init_tile_data(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
@@ -4240,28 +4540,9 @@ void av1_init_tile_data(AV1_COMP *cpi) {
const int tile_rows = cm->tile_rows;
int tile_col, tile_row;
TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
+ TOKENLIST *tplist = cpi->tplist[0][0];
unsigned int tile_tok = 0;
-
- if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
- if (cpi->tile_data != NULL) aom_free(cpi->tile_data);
- CHECK_MEM_ERROR(
- cm, cpi->tile_data,
- aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
- cpi->allocated_tiles = tile_cols * tile_rows;
-
- for (tile_row = 0; tile_row < tile_rows; ++tile_row)
- for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
- TileDataEnc *const tile_data =
- &cpi->tile_data[tile_row * tile_cols + tile_col];
- int i, j;
- for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
- for (j = 0; j < MAX_MODES; ++j) {
- tile_data->thresh_freq_fact[i][j] = 32;
- tile_data->mode_map[i][j] = j;
- }
- }
- }
- }
+ int tplist_count = 0;
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
@@ -4274,6 +4555,9 @@ void av1_init_tile_data(AV1_COMP *cpi) {
pre_tok = cpi->tile_tok[tile_row][tile_col];
tile_tok = allocated_tokens(
*tile_info, cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
+ cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
+ tplist = cpi->tplist[tile_row][tile_col];
+ tplist_count = av1_get_sb_rows_in_tile(cm, tile_data->tile_info);
tile_data->allow_update_cdf = !cm->large_scale_tile;
tile_data->allow_update_cdf =
tile_data->allow_update_cdf && !cm->disable_cdf_update;
@@ -4281,15 +4565,56 @@ void av1_init_tile_data(AV1_COMP *cpi) {
}
}
+void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
+ int tile_col, int mi_row) {
+ AV1_COMMON *const cm = &cpi->common;
+ const int num_planes = av1_num_planes(cm);
+ const int tile_cols = cm->tile_cols;
+ TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
+ const TileInfo *const tile_info = &this_tile->tile_info;
+ TOKENEXTRA *tok = NULL;
+ int sb_row_in_tile;
+ int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
+
+ int num_mb_rows_in_sb =
+ ((1 << (cm->seq_params.mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
+
+ sb_row_in_tile =
+ (mi_row - tile_info->mi_row_start) >> cm->seq_params.mib_size_log2;
+
+ get_start_tok(cpi, tile_row, tile_col, mi_row, &tok,
+ cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
+ cpi->tplist[tile_row][tile_col][sb_row_in_tile].start = tok;
+
+ encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
+
+ cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop = tok;
+ cpi->tplist[tile_row][tile_col][sb_row_in_tile].count =
+ (unsigned int)(cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop -
+ cpi->tplist[tile_row][tile_col][sb_row_in_tile].start);
+
+ assert(
+ (unsigned int)(tok -
+ cpi->tplist[tile_row][tile_col][sb_row_in_tile].start) <=
+ get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
+ cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes));
+
+ (void)tile_mb_cols;
+ (void)num_mb_rows_in_sb;
+}
+
void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
int tile_col) {
AV1_COMMON *const cm = &cpi->common;
TileDataEnc *const this_tile =
&cpi->tile_data[tile_row * cm->tile_cols + tile_col];
const TileInfo *const tile_info = &this_tile->tile_info;
- TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
int mi_row;
+#if CONFIG_COLLECT_INTER_MODE_RD_STATS
+ av1_inter_mode_data_init(this_tile);
+#endif
+
av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
tile_info->mi_col_end, tile_row);
av1_init_above_context(cm, &td->mb.e_mbd, tile_row);
@@ -4310,25 +4635,23 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
mi_row += cm->seq_params.mib_size) {
- encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
+ av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
}
-
- cpi->tok_count[tile_row][tile_col] =
- (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
- assert(cpi->tok_count[tile_row][tile_col] <=
- allocated_tokens(*tile_info,
- cm->seq_params.mib_size_log2 + MI_SIZE_LOG2,
- av1_num_planes(cm)));
}
static void encode_tiles(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
+ const int tile_cols = cm->tile_cols;
+ const int tile_rows = cm->tile_rows;
int tile_col, tile_row;
+ if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows)
+ av1_alloc_tile_data(cpi);
+
av1_init_tile_data(cpi);
- for (tile_row = 0; tile_row < cm->tile_rows; ++tile_row) {
- for (tile_col = 0; tile_col < cm->tile_cols; ++tile_col) {
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
cpi->intrabc_used |= cpi->td.intrabc_used_this_tile;
}
@@ -4616,6 +4939,13 @@ static INLINE int skip_gm_frame(AV1_COMMON *const cm, int ref_frame) {
return 0;
}
+static void set_default_interp_skip_flags(AV1_COMP *cpi) {
+ const int num_planes = av1_num_planes(&cpi->common);
+ cpi->default_interp_skip_flags = (num_planes == 1)
+ ? DEFAULT_LUMA_INTERP_SKIP_FLAG
+ : DEFAULT_INTERP_SKIP_FLAG;
+}
+
static void encode_frame_internal(AV1_COMP *cpi) {
ThreadData *const td = &cpi->td;
MACROBLOCK *const x = &td->mb;
@@ -4683,41 +5013,41 @@ static void encode_frame_internal(AV1_COMP *cpi) {
av1_hash_table_create(&cm->cur_frame->hash_table);
av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0],
- is_block_same[0]);
+ is_block_same[0], &cpi->td.mb);
av1_generate_block_hash_value(cpi->source, 4, block_hash_values[0],
block_hash_values[1], is_block_same[0],
- is_block_same[1]);
+ is_block_same[1], &cpi->td.mb);
av1_add_to_hash_map_by_row_with_precal_data(
&cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
pic_width, pic_height, 4);
av1_generate_block_hash_value(cpi->source, 8, block_hash_values[1],
block_hash_values[0], is_block_same[1],
- is_block_same[0]);
+ is_block_same[0], &cpi->td.mb);
av1_add_to_hash_map_by_row_with_precal_data(
&cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
pic_width, pic_height, 8);
av1_generate_block_hash_value(cpi->source, 16, block_hash_values[0],
block_hash_values[1], is_block_same[0],
- is_block_same[1]);
+ is_block_same[1], &cpi->td.mb);
av1_add_to_hash_map_by_row_with_precal_data(
&cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
pic_width, pic_height, 16);
av1_generate_block_hash_value(cpi->source, 32, block_hash_values[1],
block_hash_values[0], is_block_same[1],
- is_block_same[0]);
+ is_block_same[0], &cpi->td.mb);
av1_add_to_hash_map_by_row_with_precal_data(
&cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
pic_width, pic_height, 32);
av1_generate_block_hash_value(cpi->source, 64, block_hash_values[0],
block_hash_values[1], is_block_same[0],
- is_block_same[1]);
+ is_block_same[1], &cpi->td.mb);
av1_add_to_hash_map_by_row_with_precal_data(
&cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
pic_width, pic_height, 64);
av1_generate_block_hash_value(cpi->source, 128, block_hash_values[1],
block_hash_values[0], is_block_same[1],
- is_block_same[0]);
+ is_block_same[0], &cpi->td.mb);
av1_add_to_hash_map_by_row_with_precal_data(
&cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
pic_width, pic_height, 128);
@@ -4769,7 +5099,7 @@ static void encode_frame_internal(AV1_COMP *cpi) {
av1_initialize_rd_consts(cpi);
av1_initialize_me_consts(cpi, x, cm->base_qindex);
init_encode_frame_mb_context(cpi);
-
+ set_default_interp_skip_flags(cpi);
if (cm->prev_frame)
cm->last_frame_seg_map = cm->prev_frame->seg_map;
else
@@ -4793,6 +5123,9 @@ static void encode_frame_internal(AV1_COMP *cpi) {
av1_zero(rdc->global_motion_used);
av1_zero(cpi->gmparams_cost);
+#if !CONFIG_GLOBAL_MOTION_SEARCH
+ cpi->global_motion_search_done = 1;
+#endif // !CONFIG_GLOBAL_MOTION_SEARCH
if (cpi->common.frame_type == INTER_FRAME && cpi->source &&
!cpi->global_motion_search_done) {
YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES];
@@ -4939,27 +5272,13 @@ static void encode_frame_internal(AV1_COMP *cpi) {
}
#endif
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
- av1_inter_mode_data_init();
-#endif
-
- // If allowed, encoding tiles in parallel with one thread handling one tile.
- // TODO(geza.lore): The multi-threaded encoder is not safe with more than
- // 1 tile rows, as it uses the single above_context et al arrays from
- // cpi->common
- if (AOMMIN(cpi->oxcf.max_threads, cm->tile_cols) > 1 && cm->tile_rows == 1)
+ if (cpi->row_mt && (cpi->oxcf.max_threads > 1))
+ av1_encode_tiles_mt(cpi);
+ else if (AOMMIN(cpi->oxcf.max_threads, cm->tile_cols * cm->tile_rows) > 1)
av1_encode_tiles_mt(cpi);
else
encode_tiles(cpi);
-#if CONFIG_COLLECT_INTER_MODE_RD_STATS
-#if INTER_MODE_RD_TEST
- if (cpi->sf.inter_mode_rd_model_estimation) {
- av1_inter_mode_data_show(cm);
- }
-#endif
-#endif
-
aom_usec_timer_mark(&emr_timer);
cpi->time_encode_sb_row += aom_usec_timer_elapsed(&emr_timer);
}
@@ -5407,7 +5726,7 @@ static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
tx_size = (bsize > BLOCK_4X4) ? tx_size : TX_4X4;
}
mbmi->tx_size = tx_size;
- set_txfm_ctxs(tx_size, xd->n8_w, xd->n8_h,
+ set_txfm_ctxs(tx_size, xd->n4_w, xd->n4_h,
(mbmi->skip || seg_skip) && is_inter_block(mbmi), xd);
}
CFL_CTX *const cfl = &xd->cfl;