diff options
Diffstat (limited to 'third_party/aom/av1/encoder/speed_features.h')
-rw-r--r-- | third_party/aom/av1/encoder/speed_features.h | 253 |
1 files changed, 194 insertions, 59 deletions
diff --git a/third_party/aom/av1/encoder/speed_features.h b/third_party/aom/av1/encoder/speed_features.h index edd79cd16..59cb6be58 100644 --- a/third_party/aom/av1/encoder/speed_features.h +++ b/third_party/aom/av1/encoder/speed_features.h @@ -20,64 +20,51 @@ extern "C" { enum { INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) | - (1 << D135_PRED) | (1 << D117_PRED) | (1 << D153_PRED) | - (1 << D207_PRED) | (1 << D63_PRED) | (1 << SMOOTH_PRED) | -#if CONFIG_SMOOTH_HV - (1 << SMOOTH_V_PRED) | (1 << SMOOTH_H_PRED) | -#endif // CONFIG_SMOOTH_HV - (1 << TM_PRED), -#if CONFIG_CFL - UV_INTRA_ALL = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED) | - (1 << UV_D45_PRED) | (1 << UV_D135_PRED) | - (1 << UV_D117_PRED) | (1 << UV_D153_PRED) | - (1 << UV_D207_PRED) | (1 << UV_D63_PRED) | - (1 << UV_SMOOTH_PRED) | -#if CONFIG_SMOOTH_HV - (1 << UV_SMOOTH_V_PRED) | (1 << UV_SMOOTH_H_PRED) | -#endif // CONFIG_SMOOTH_HV - (1 << UV_TM_PRED) | (1 << UV_CFL_PRED), + (1 << D135_PRED) | (1 << D113_PRED) | (1 << D157_PRED) | + (1 << D203_PRED) | (1 << D67_PRED) | (1 << SMOOTH_PRED) | + (1 << SMOOTH_V_PRED) | (1 << SMOOTH_H_PRED) | (1 << PAETH_PRED), + UV_INTRA_ALL = + (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED) | + (1 << UV_D45_PRED) | (1 << UV_D135_PRED) | (1 << UV_D113_PRED) | + (1 << UV_D157_PRED) | (1 << UV_D203_PRED) | (1 << UV_D67_PRED) | + (1 << UV_SMOOTH_PRED) | (1 << UV_SMOOTH_V_PRED) | + (1 << UV_SMOOTH_H_PRED) | (1 << UV_PAETH_PRED) | (1 << UV_CFL_PRED), UV_INTRA_DC = (1 << UV_DC_PRED), UV_INTRA_DC_CFL = (1 << UV_DC_PRED) | (1 << UV_CFL_PRED), - UV_INTRA_DC_TM = (1 << UV_DC_PRED) | (1 << UV_TM_PRED), - UV_INTRA_DC_TM_CFL = - (1 << UV_DC_PRED) | (1 << UV_TM_PRED) | (1 << UV_CFL_PRED), + UV_INTRA_DC_TM = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED), + UV_INTRA_DC_PAETH_CFL = + (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) | (1 << UV_CFL_PRED), UV_INTRA_DC_H_V = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED), UV_INTRA_DC_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED) | (1 << UV_CFL_PRED), - UV_INTRA_DC_TM_H_V = (1 << UV_DC_PRED) | (1 << UV_TM_PRED) | - (1 << UV_V_PRED) | (1 << UV_H_PRED), - UV_INTRA_DC_TM_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_TM_PRED) | - (1 << UV_V_PRED) | (1 << UV_H_PRED) | - (1 << UV_CFL_PRED), -#endif // CONFIG_CFL + UV_INTRA_DC_PAETH_H_V = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) | + (1 << UV_V_PRED) | (1 << UV_H_PRED), + UV_INTRA_DC_PAETH_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) | + (1 << UV_V_PRED) | (1 << UV_H_PRED) | + (1 << UV_CFL_PRED), INTRA_DC = (1 << DC_PRED), - INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED), + INTRA_DC_TM = (1 << DC_PRED) | (1 << PAETH_PRED), INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED), - INTRA_DC_TM_H_V = - (1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) | (1 << H_PRED) + INTRA_DC_PAETH_H_V = + (1 << DC_PRED) | (1 << PAETH_PRED) | (1 << V_PRED) | (1 << H_PRED) }; enum { -#if CONFIG_COMPOUND_SINGLEREF -// TODO(zoeliu): To further consider following single ref comp modes: -// SR_NEAREST_NEARMV, SR_NEAREST_NEWMV, SR_NEAR_NEWMV, -// SR_ZERO_NEWMV, and SR_NEW_NEWMV. -#endif // CONFIG_COMPOUND_SINGLEREF - INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV) | - (1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) | (1 << NEW_NEWMV) | - (1 << NEAREST_NEWMV) | (1 << NEAR_NEWMV) | (1 << NEW_NEARMV) | - (1 << NEW_NEARESTMV) | (1 << ZERO_ZEROMV), + INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) | + (1 << NEWMV) | (1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) | + (1 << NEW_NEWMV) | (1 << NEAREST_NEWMV) | (1 << NEAR_NEWMV) | + (1 << NEW_NEARMV) | (1 << NEW_NEARESTMV) | (1 << GLOBAL_GLOBALMV), INTER_NEAREST = (1 << NEARESTMV) | (1 << NEAREST_NEARESTMV) | (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV), INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV) | (1 << NEAREST_NEARESTMV) | (1 << NEW_NEWMV) | (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) | (1 << NEW_NEARMV) | (1 << NEAR_NEWMV), - INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | - (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | + INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << GLOBALMV) | + (1 << NEAREST_NEARESTMV) | (1 << GLOBAL_GLOBALMV) | (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV), - INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV) | - (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | + INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << GLOBALMV) | (1 << NEWMV) | + (1 << NEAREST_NEARESTMV) | (1 << GLOBAL_GLOBALMV) | (1 << NEW_NEWMV) | (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) | (1 << NEW_NEARMV) | (1 << NEAR_NEWMV), @@ -86,8 +73,8 @@ enum { (1 << NEW_NEARESTMV) | (1 << NEAREST_NEWMV) | (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | (1 << NEAR_NEARMV), - INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | - (1 << NEAREST_NEARESTMV) | (1 << ZERO_ZEROMV) | + INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) | + (1 << NEAREST_NEARESTMV) | (1 << GLOBAL_GLOBALMV) | (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) | (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | (1 << NEAR_NEARMV), @@ -106,6 +93,17 @@ enum { }; typedef enum { + TXFM_CODING_SF = 1, + INTER_PRED_SF = 2, + INTRA_PRED_SF = 4, + PARTITION_SF = 8, + LOOP_FILTER_SF = 16, + RD_SKIP_SF = 32, + RESERVE_2_SF = 64, + RESERVE_3_SF = 128, +} DEV_SPEED_FEATURES; + +typedef enum { DIAMOND = 0, NSTEP = 1, HEX = 2, @@ -141,8 +139,8 @@ typedef enum { typedef enum { USE_FULL_RD = 0, + USE_FAST_RD, USE_LARGESTALL, - USE_TX_8X8 } TX_SIZE_SEARCH_METHOD; typedef enum { @@ -190,10 +188,13 @@ typedef enum { NO_PRUNE = 0, // eliminates one tx type in vertical and horizontal direction PRUNE_ONE = 1, -#if CONFIG_EXT_TX // eliminates two tx types in each direction PRUNE_TWO = 2, -#endif + // adaptively prunes the least perspective tx types out of all 16 + // (tuned to provide negligible quality loss) + PRUNE_2D_ACCURATE = 3, + // similar, but applies much more aggressive pruning to get better speed-up + PRUNE_2D_FAST = 4, } TX_TYPE_PRUNE_MODE; typedef struct { @@ -204,6 +205,13 @@ typedef struct { // Use a skip flag prediction model to detect blocks with skip = 1 early // and avoid doing full TX type search for such blocks. int use_skip_flag_prediction; + + // Threshold used by the ML based method to predict TX block split decisions. + int ml_tx_split_thresh; + + // skip remaining transform type search when we found the rdcost of skip is + // better than applying transform + int skip_tx_search; } TX_TYPE_SEARCH; typedef enum { @@ -261,13 +269,29 @@ typedef struct MESH_PATTERN { int interval; } MESH_PATTERN; -#if CONFIG_GLOBAL_MOTION typedef enum { GM_FULL_SEARCH, GM_REDUCED_REF_SEARCH, GM_DISABLE_SEARCH } GM_SEARCH_TYPE; -#endif // CONFIG_GLOBAL_MOTION + +typedef enum { + GM_ERRORADV_TR_0, + GM_ERRORADV_TR_1, + GM_ERRORADV_TR_2, + GM_ERRORADV_TR_TYPES, +} GM_ERRORADV_TYPE; + +typedef enum { + NO_TRELLIS_OPT, // No trellis optimization + FULL_TRELLIS_OPT, // Trellis optimization in all stages + FINAL_PASS_TRELLIS_OPT // Trellis optimization in only the final encode pass +} TRELLIS_OPT_TYPE; + +typedef enum { + FULL_TXFM_RD, + LOW_TXFM_RD, +} TXFM_RD_MODEL; typedef struct SPEED_FEATURES { MV_SPEED_FEATURES mv; @@ -277,8 +301,11 @@ typedef struct SPEED_FEATURES { RECODE_LOOP_TYPE recode_loop; - // Trellis (dynamic programming) optimization of quantized values (+1, 0). - int optimize_coefficients; + // Trellis (dynamic programming) optimization of quantized values + TRELLIS_OPT_TYPE optimize_coefficients; + + // Global motion warp error threshold + GM_ERRORADV_TYPE gm_erroradv_type; // Always set to 0. If on it enables 0 cost background transmission // (except for the initial transmission of the segmentation). The feature is @@ -287,6 +314,14 @@ typedef struct SPEED_FEATURES { // adds overhead. int static_segmentation; + // Limit the inter mode tested in the RD loop + int reduce_inter_modes; + + // Do not compute the global motion parameters for a LAST2_FRAME or + // LAST3_FRAME if the GOLDEN_FRAME is closer and it has a non identity + // global model. + int selective_ref_gm; + // If 1 we iterate finding a best reference for 2 ref frames together - via // a log search that iterates 4 times (check around mv for last for best // error of combined predictor then check around mv for alt). If 0 we @@ -309,6 +344,17 @@ typedef struct SPEED_FEATURES { // for intra and model coefs for the rest. TX_SIZE_SEARCH_METHOD tx_size_search_method; + // Init search depth for square and rectangular transform partitions. + // Values: + // 0 - search full tree, 1: search 1 level, 2: search the highest level only + int inter_tx_size_search_init_depth_sqr; + int inter_tx_size_search_init_depth_rect; + int intra_tx_size_search_init_depth_sqr; + int intra_tx_size_search_init_depth_rect; + // If any dimension of a coding block size above 64, always search the + // largest transform only, since the largest transform block size is 64x64. + int tx_size_search_lgr_block; + // After looking at the first set of modes (set by index here), skip // checking modes for reference frames that don't match the reference frame // of the best so far. @@ -318,9 +364,51 @@ typedef struct SPEED_FEATURES { TX_TYPE_SEARCH tx_type_search; + // Skip split transform block partition when the collocated bigger block + // is selected as all zero coefficients. + int txb_split_cap; + + // Shortcut the transform block partition and type search when the target + // rdcost is relatively lower. + // Values are 0 (not used) , or 1 - 2 with progressively increasing + // aggressiveness + int adaptive_txb_search_level; + + // Prune level for tx_size_type search for inter based on rd model + // 0: no pruning + // 1-2: progressively increasing aggressiveness of pruning + int model_based_prune_tx_search_level; + + // Model based breakout after interpolation filter search + // 0: no breakout + // 1: use model based rd breakout + int model_based_post_interp_filter_breakout; + // Used if partition_search_type = FIXED_SIZE_PARTITION BLOCK_SIZE always_this_block_size; + // Drop less likely to be picked reference frames in the RD search. + // Has three levels for now: 0, 1 and 2, where higher levels prune more + // aggressively than lower ones. (0 means no pruning). + int selective_ref_frame; + + // Prune extended partition types search + // Can take values 0 - 2, 0 referring to no pruning, and 1 - 2 increasing + // aggressiveness of pruning in order. + int prune_ext_partition_types_search_level; + + // Use a ML model to prune horz_a, horz_b, vert_a and vert_b partitions. + int ml_prune_ab_partition; + + int fast_cdef_search; + + // 2-pass coding block partition search + int two_pass_partition_search; + + // Use the mode decisions made in the initial partition search to prune mode + // candidates, e.g. ref frames. + int mode_pruning_based_on_two_pass_partition_search; + // Skip rectangular partition test when partition type none gives better // rd than partition type split. int less_rectangular_check; @@ -427,7 +515,7 @@ typedef struct SPEED_FEATURES { // by only looking at counts from 1/2 the bands. FAST_COEFF_UPDATE use_fast_coef_updates; - // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV + // A binary mask indicating if NEARESTMV, NEARMV, GLOBALMV, NEWMV // modes are used in order from LSB to MSB for each BLOCK_SIZE. int inter_mode_mask[BLOCK_SIZES_ALL]; @@ -456,10 +544,6 @@ typedef struct SPEED_FEATURES { // default interp filter choice InterpFilter default_interp_filter; - // Early termination in transform size search, which only applies while - // tx_size_search_method is USE_FULL_RD. - int tx_size_search_breakout; - // adaptive interp_filter search to allow skip of certain filter types. int adaptive_interp_filter_search; @@ -476,16 +560,67 @@ typedef struct SPEED_FEATURES { // Fast approximation of av1_model_rd_from_var_lapndz int simple_model_rd_from_var; - // Do sub-pixel search in up-sampled reference frames - int use_upsampled_references; + // If true, sub-pixel search uses the exact convolve function used for final + // encoding and decoding; otherwise, it uses bilinear interpolation. + int use_accurate_subpel_search; // Whether to compute distortion in the image domain (slower but // more accurate), or in the transform domain (faster but less acurate). + // 0: use image domain + // 1: use transform domain in tx_type search, and use image domain for + // RD_STATS + // 2: use transform domain int use_transform_domain_distortion; -#if CONFIG_GLOBAL_MOTION GM_SEARCH_TYPE gm_search_type; -#endif // CONFIG_GLOBAL_MOTION + + // Do limited interpolation filter search for dual filters, since best choice + // usually includes EIGHTTAP_REGULAR. + int use_fast_interpolation_filter_search; + + // Save results of interpolation_filter_search for a block + // Check mv and ref_frames before search, if they are same with previous + // saved results, it can be skipped. + int skip_repeat_interpolation_filter_search; + + // Use a hash table to store previously computed optimized qcoeffs from + // expensive calls to optimize_txb. + int use_hash_based_trellis; + + // flag to drop some ref frames in compound motion search + int drop_ref; + + // flag to allow skipping intra mode for inter frame prediction + int skip_intra_in_interframe; + + // Use hash table to store intra(keyframe only) txb transform search results + // to avoid repeated search on the same residue signal. + int use_intra_txb_hash; + + // Use hash table to store inter txb transform search results + // to avoid repeated search on the same residue signal. + int use_inter_txb_hash; + + // Use hash table to store macroblock RD search results + // to avoid repeated search on the same residue signal. + int use_mb_rd_hash; + + // Calculate RD cost before doing optimize_b, and skip if the cost is large. + int optimize_b_precheck; + + // Use model rd instead of transform search in jnt_comp + int jnt_comp_fast_tx_search; + + // Skip mv search in jnt_comp + int jnt_comp_skip_mv_search; + + // Decoder side speed feature to add penalty for use of dual-sgr filters. + // Takes values 0 - 10, 0 indicating no penalty and each additional level + // adding a penalty of 1% + int dual_sgr_penalty_level; + + // Dynamically estimate final rd from prediction error and mode cost + int inter_mode_rd_model_estimation; } SPEED_FEATURES; struct AV1_COMP; |