summaryrefslogtreecommitdiffstats
path: root/third_party/aom/av1/encoder/speed_features.h
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/av1/encoder/speed_features.h')
-rw-r--r--third_party/aom/av1/encoder/speed_features.h253
1 files changed, 194 insertions, 59 deletions
diff --git a/third_party/aom/av1/encoder/speed_features.h b/third_party/aom/av1/encoder/speed_features.h
index edd79cd16..59cb6be58 100644
--- a/third_party/aom/av1/encoder/speed_features.h
+++ b/third_party/aom/av1/encoder/speed_features.h
@@ -20,64 +20,51 @@ extern "C" {
enum {
INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) |
- (1 << D135_PRED) | (1 << D117_PRED) | (1 << D153_PRED) |
- (1 << D207_PRED) | (1 << D63_PRED) | (1 << SMOOTH_PRED) |
-#if CONFIG_SMOOTH_HV
- (1 << SMOOTH_V_PRED) | (1 << SMOOTH_H_PRED) |
-#endif // CONFIG_SMOOTH_HV
- (1 << TM_PRED),
-#if CONFIG_CFL
- UV_INTRA_ALL = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED) |
- (1 << UV_D45_PRED) | (1 << UV_D135_PRED) |
- (1 << UV_D117_PRED) | (1 << UV_D153_PRED) |
- (1 << UV_D207_PRED) | (1 << UV_D63_PRED) |
- (1 << UV_SMOOTH_PRED) |
-#if CONFIG_SMOOTH_HV
- (1 << UV_SMOOTH_V_PRED) | (1 << UV_SMOOTH_H_PRED) |
-#endif // CONFIG_SMOOTH_HV
- (1 << UV_TM_PRED) | (1 << UV_CFL_PRED),
+ (1 << D135_PRED) | (1 << D113_PRED) | (1 << D157_PRED) |
+ (1 << D203_PRED) | (1 << D67_PRED) | (1 << SMOOTH_PRED) |
+ (1 << SMOOTH_V_PRED) | (1 << SMOOTH_H_PRED) | (1 << PAETH_PRED),
+ UV_INTRA_ALL =
+ (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED) |
+ (1 << UV_D45_PRED) | (1 << UV_D135_PRED) | (1 << UV_D113_PRED) |
+ (1 << UV_D157_PRED) | (1 << UV_D203_PRED) | (1 << UV_D67_PRED) |
+ (1 << UV_SMOOTH_PRED) | (1 << UV_SMOOTH_V_PRED) |
+ (1 << UV_SMOOTH_H_PRED) | (1 << UV_PAETH_PRED) | (1 << UV_CFL_PRED),
UV_INTRA_DC = (1 << UV_DC_PRED),
UV_INTRA_DC_CFL = (1 << UV_DC_PRED) | (1 << UV_CFL_PRED),
- UV_INTRA_DC_TM = (1 << UV_DC_PRED) | (1 << UV_TM_PRED),
- UV_INTRA_DC_TM_CFL =
- (1 << UV_DC_PRED) | (1 << UV_TM_PRED) | (1 << UV_CFL_PRED),
+ UV_INTRA_DC_TM = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED),
+ UV_INTRA_DC_PAETH_CFL =
+ (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) | (1 << UV_CFL_PRED),
UV_INTRA_DC_H_V = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED),
UV_INTRA_DC_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_V_PRED) |
(1 << UV_H_PRED) | (1 << UV_CFL_PRED),
- UV_INTRA_DC_TM_H_V = (1 << UV_DC_PRED) | (1 << UV_TM_PRED) |
- (1 << UV_V_PRED) | (1 << UV_H_PRED),
- UV_INTRA_DC_TM_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_TM_PRED) |
- (1 << UV_V_PRED) | (1 << UV_H_PRED) |
- (1 << UV_CFL_PRED),
-#endif // CONFIG_CFL
+ UV_INTRA_DC_PAETH_H_V = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) |
+ (1 << UV_V_PRED) | (1 << UV_H_PRED),
+ UV_INTRA_DC_PAETH_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) |
+ (1 << UV_V_PRED) | (1 << UV_H_PRED) |
+ (1 << UV_CFL_PRED),
INTRA_DC = (1 << DC_PRED),
- INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED),
+ INTRA_DC_TM = (1 << DC_PRED) | (1 << PAETH_PRED),
INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED),
- INTRA_DC_TM_H_V =
- (1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) | (1 << H_PRED)
+ INTRA_DC_PAETH_H_V =
+ (1 << DC_PRED) | (1 << PAETH_PRED) | (1 << V_PRED) | (1 << H_PRED)
};
enum {
-#if CONFIG_COMPOUND_SINGLEREF
-// TODO(zoeliu): To further consider following single ref comp modes:
-// SR_NEAREST_NEARMV, SR_NEAREST_NEWMV, SR_NEAR_NEWMV,
-// SR_ZERO_NEWMV, and SR_NEW_NEWMV.
-#endif // CONFIG_COMPOUND_SINGLEREF
- INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV) |
- (1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) | (1 << NEW_NEWMV) |
- (1 << NEAREST_NEWMV) | (1 << NEAR_NEWMV) | (1 << NEW_NEARMV) |
- (1 << NEW_NEARESTMV) | (1 << ZERO_ZEROMV),
+ INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) |
+ (1 << NEWMV) | (1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) |
+ (1 << NEW_NEWMV) | (1 << NEAREST_NEWMV) | (1 << NEAR_NEWMV) |
+ (1 << NEW_NEARMV) | (1 << NEW_NEARESTMV) | (1 << GLOBAL_GLOBALMV),
INTER_NEAREST = (1 << NEARESTMV) | (1 << NEAREST_NEARESTMV) |
(1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV),
INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV) |
(1 << NEAREST_NEARESTMV) | (1 << NEW_NEWMV) |
(1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) |
(1 << NEW_NEARMV) | (1 << NEAR_NEWMV),
- INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) |
- (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) |
+ INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << GLOBALMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << GLOBAL_GLOBALMV) |
(1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV),
- INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV) |
- (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) |
+ INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << GLOBALMV) | (1 << NEWMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << GLOBAL_GLOBALMV) |
(1 << NEW_NEWMV) | (1 << NEW_NEARESTMV) |
(1 << NEAREST_NEWMV) | (1 << NEW_NEARMV) |
(1 << NEAR_NEWMV),
@@ -86,8 +73,8 @@ enum {
(1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) |
(1 << NEW_NEARMV) | (1 << NEAR_NEWMV) |
(1 << NEAR_NEARMV),
- INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) |
- (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) |
+ INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) |
+ (1 << NEAREST_NEARESTMV) | (1 << GLOBAL_GLOBALMV) |
(1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) |
(1 << NEW_NEARMV) | (1 << NEAR_NEWMV) |
(1 << NEAR_NEARMV),
@@ -106,6 +93,17 @@ enum {
};
typedef enum {
+ TXFM_CODING_SF = 1,
+ INTER_PRED_SF = 2,
+ INTRA_PRED_SF = 4,
+ PARTITION_SF = 8,
+ LOOP_FILTER_SF = 16,
+ RD_SKIP_SF = 32,
+ RESERVE_2_SF = 64,
+ RESERVE_3_SF = 128,
+} DEV_SPEED_FEATURES;
+
+typedef enum {
DIAMOND = 0,
NSTEP = 1,
HEX = 2,
@@ -141,8 +139,8 @@ typedef enum {
typedef enum {
USE_FULL_RD = 0,
+ USE_FAST_RD,
USE_LARGESTALL,
- USE_TX_8X8
} TX_SIZE_SEARCH_METHOD;
typedef enum {
@@ -190,10 +188,13 @@ typedef enum {
NO_PRUNE = 0,
// eliminates one tx type in vertical and horizontal direction
PRUNE_ONE = 1,
-#if CONFIG_EXT_TX
// eliminates two tx types in each direction
PRUNE_TWO = 2,
-#endif
+ // adaptively prunes the least perspective tx types out of all 16
+ // (tuned to provide negligible quality loss)
+ PRUNE_2D_ACCURATE = 3,
+ // similar, but applies much more aggressive pruning to get better speed-up
+ PRUNE_2D_FAST = 4,
} TX_TYPE_PRUNE_MODE;
typedef struct {
@@ -204,6 +205,13 @@ typedef struct {
// Use a skip flag prediction model to detect blocks with skip = 1 early
// and avoid doing full TX type search for such blocks.
int use_skip_flag_prediction;
+
+ // Threshold used by the ML based method to predict TX block split decisions.
+ int ml_tx_split_thresh;
+
+ // skip remaining transform type search when we found the rdcost of skip is
+ // better than applying transform
+ int skip_tx_search;
} TX_TYPE_SEARCH;
typedef enum {
@@ -261,13 +269,29 @@ typedef struct MESH_PATTERN {
int interval;
} MESH_PATTERN;
-#if CONFIG_GLOBAL_MOTION
typedef enum {
GM_FULL_SEARCH,
GM_REDUCED_REF_SEARCH,
GM_DISABLE_SEARCH
} GM_SEARCH_TYPE;
-#endif // CONFIG_GLOBAL_MOTION
+
+typedef enum {
+ GM_ERRORADV_TR_0,
+ GM_ERRORADV_TR_1,
+ GM_ERRORADV_TR_2,
+ GM_ERRORADV_TR_TYPES,
+} GM_ERRORADV_TYPE;
+
+typedef enum {
+ NO_TRELLIS_OPT, // No trellis optimization
+ FULL_TRELLIS_OPT, // Trellis optimization in all stages
+ FINAL_PASS_TRELLIS_OPT // Trellis optimization in only the final encode pass
+} TRELLIS_OPT_TYPE;
+
+typedef enum {
+ FULL_TXFM_RD,
+ LOW_TXFM_RD,
+} TXFM_RD_MODEL;
typedef struct SPEED_FEATURES {
MV_SPEED_FEATURES mv;
@@ -277,8 +301,11 @@ typedef struct SPEED_FEATURES {
RECODE_LOOP_TYPE recode_loop;
- // Trellis (dynamic programming) optimization of quantized values (+1, 0).
- int optimize_coefficients;
+ // Trellis (dynamic programming) optimization of quantized values
+ TRELLIS_OPT_TYPE optimize_coefficients;
+
+ // Global motion warp error threshold
+ GM_ERRORADV_TYPE gm_erroradv_type;
// Always set to 0. If on it enables 0 cost background transmission
// (except for the initial transmission of the segmentation). The feature is
@@ -287,6 +314,14 @@ typedef struct SPEED_FEATURES {
// adds overhead.
int static_segmentation;
+ // Limit the inter mode tested in the RD loop
+ int reduce_inter_modes;
+
+ // Do not compute the global motion parameters for a LAST2_FRAME or
+ // LAST3_FRAME if the GOLDEN_FRAME is closer and it has a non identity
+ // global model.
+ int selective_ref_gm;
+
// If 1 we iterate finding a best reference for 2 ref frames together - via
// a log search that iterates 4 times (check around mv for last for best
// error of combined predictor then check around mv for alt). If 0 we
@@ -309,6 +344,17 @@ typedef struct SPEED_FEATURES {
// for intra and model coefs for the rest.
TX_SIZE_SEARCH_METHOD tx_size_search_method;
+ // Init search depth for square and rectangular transform partitions.
+ // Values:
+ // 0 - search full tree, 1: search 1 level, 2: search the highest level only
+ int inter_tx_size_search_init_depth_sqr;
+ int inter_tx_size_search_init_depth_rect;
+ int intra_tx_size_search_init_depth_sqr;
+ int intra_tx_size_search_init_depth_rect;
+ // If any dimension of a coding block size above 64, always search the
+ // largest transform only, since the largest transform block size is 64x64.
+ int tx_size_search_lgr_block;
+
// After looking at the first set of modes (set by index here), skip
// checking modes for reference frames that don't match the reference frame
// of the best so far.
@@ -318,9 +364,51 @@ typedef struct SPEED_FEATURES {
TX_TYPE_SEARCH tx_type_search;
+ // Skip split transform block partition when the collocated bigger block
+ // is selected as all zero coefficients.
+ int txb_split_cap;
+
+ // Shortcut the transform block partition and type search when the target
+ // rdcost is relatively lower.
+ // Values are 0 (not used) , or 1 - 2 with progressively increasing
+ // aggressiveness
+ int adaptive_txb_search_level;
+
+ // Prune level for tx_size_type search for inter based on rd model
+ // 0: no pruning
+ // 1-2: progressively increasing aggressiveness of pruning
+ int model_based_prune_tx_search_level;
+
+ // Model based breakout after interpolation filter search
+ // 0: no breakout
+ // 1: use model based rd breakout
+ int model_based_post_interp_filter_breakout;
+
// Used if partition_search_type = FIXED_SIZE_PARTITION
BLOCK_SIZE always_this_block_size;
+ // Drop less likely to be picked reference frames in the RD search.
+ // Has three levels for now: 0, 1 and 2, where higher levels prune more
+ // aggressively than lower ones. (0 means no pruning).
+ int selective_ref_frame;
+
+ // Prune extended partition types search
+ // Can take values 0 - 2, 0 referring to no pruning, and 1 - 2 increasing
+ // aggressiveness of pruning in order.
+ int prune_ext_partition_types_search_level;
+
+ // Use a ML model to prune horz_a, horz_b, vert_a and vert_b partitions.
+ int ml_prune_ab_partition;
+
+ int fast_cdef_search;
+
+ // 2-pass coding block partition search
+ int two_pass_partition_search;
+
+ // Use the mode decisions made in the initial partition search to prune mode
+ // candidates, e.g. ref frames.
+ int mode_pruning_based_on_two_pass_partition_search;
+
// Skip rectangular partition test when partition type none gives better
// rd than partition type split.
int less_rectangular_check;
@@ -427,7 +515,7 @@ typedef struct SPEED_FEATURES {
// by only looking at counts from 1/2 the bands.
FAST_COEFF_UPDATE use_fast_coef_updates;
- // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV
+ // A binary mask indicating if NEARESTMV, NEARMV, GLOBALMV, NEWMV
// modes are used in order from LSB to MSB for each BLOCK_SIZE.
int inter_mode_mask[BLOCK_SIZES_ALL];
@@ -456,10 +544,6 @@ typedef struct SPEED_FEATURES {
// default interp filter choice
InterpFilter default_interp_filter;
- // Early termination in transform size search, which only applies while
- // tx_size_search_method is USE_FULL_RD.
- int tx_size_search_breakout;
-
// adaptive interp_filter search to allow skip of certain filter types.
int adaptive_interp_filter_search;
@@ -476,16 +560,67 @@ typedef struct SPEED_FEATURES {
// Fast approximation of av1_model_rd_from_var_lapndz
int simple_model_rd_from_var;
- // Do sub-pixel search in up-sampled reference frames
- int use_upsampled_references;
+ // If true, sub-pixel search uses the exact convolve function used for final
+ // encoding and decoding; otherwise, it uses bilinear interpolation.
+ int use_accurate_subpel_search;
// Whether to compute distortion in the image domain (slower but
// more accurate), or in the transform domain (faster but less acurate).
+ // 0: use image domain
+ // 1: use transform domain in tx_type search, and use image domain for
+ // RD_STATS
+ // 2: use transform domain
int use_transform_domain_distortion;
-#if CONFIG_GLOBAL_MOTION
GM_SEARCH_TYPE gm_search_type;
-#endif // CONFIG_GLOBAL_MOTION
+
+ // Do limited interpolation filter search for dual filters, since best choice
+ // usually includes EIGHTTAP_REGULAR.
+ int use_fast_interpolation_filter_search;
+
+ // Save results of interpolation_filter_search for a block
+ // Check mv and ref_frames before search, if they are same with previous
+ // saved results, it can be skipped.
+ int skip_repeat_interpolation_filter_search;
+
+ // Use a hash table to store previously computed optimized qcoeffs from
+ // expensive calls to optimize_txb.
+ int use_hash_based_trellis;
+
+ // flag to drop some ref frames in compound motion search
+ int drop_ref;
+
+ // flag to allow skipping intra mode for inter frame prediction
+ int skip_intra_in_interframe;
+
+ // Use hash table to store intra(keyframe only) txb transform search results
+ // to avoid repeated search on the same residue signal.
+ int use_intra_txb_hash;
+
+ // Use hash table to store inter txb transform search results
+ // to avoid repeated search on the same residue signal.
+ int use_inter_txb_hash;
+
+ // Use hash table to store macroblock RD search results
+ // to avoid repeated search on the same residue signal.
+ int use_mb_rd_hash;
+
+ // Calculate RD cost before doing optimize_b, and skip if the cost is large.
+ int optimize_b_precheck;
+
+ // Use model rd instead of transform search in jnt_comp
+ int jnt_comp_fast_tx_search;
+
+ // Skip mv search in jnt_comp
+ int jnt_comp_skip_mv_search;
+
+ // Decoder side speed feature to add penalty for use of dual-sgr filters.
+ // Takes values 0 - 10, 0 indicating no penalty and each additional level
+ // adding a penalty of 1%
+ int dual_sgr_penalty_level;
+
+ // Dynamically estimate final rd from prediction error and mode cost
+ int inter_mode_rd_model_estimation;
} SPEED_FEATURES;
struct AV1_COMP;