From 277f2116b6660e9bbe7f5d67524be57eceb49b8b Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Tue, 7 Apr 2020 23:30:51 -0400 Subject: Move aom source to a sub-directory under media/libaom There is no damned reason to treat this differently than any other media lib given its license and there never was. --- third_party/aom/av1/encoder/aq_complexity.c | 172 - third_party/aom/av1/encoder/aq_complexity.h | 37 - third_party/aom/av1/encoder/aq_cyclicrefresh.c | 580 - third_party/aom/av1/encoder/aq_cyclicrefresh.h | 98 - third_party/aom/av1/encoder/aq_variance.c | 202 - third_party/aom/av1/encoder/aq_variance.h | 33 - .../aom/av1/encoder/arm/neon/quantize_neon.c | 118 - third_party/aom/av1/encoder/av1_fwd_txfm1d.c | 1885 --- third_party/aom/av1/encoder/av1_fwd_txfm1d.h | 49 - third_party/aom/av1/encoder/av1_fwd_txfm1d_cfg.h | 19 - third_party/aom/av1/encoder/av1_fwd_txfm2d.c | 431 - third_party/aom/av1/encoder/av1_quantize.c | 738 -- third_party/aom/av1/encoder/av1_quantize.h | 148 - third_party/aom/av1/encoder/bitstream.c | 3999 ------ third_party/aom/av1/encoder/bitstream.h | 51 - third_party/aom/av1/encoder/block.h | 452 - third_party/aom/av1/encoder/blockiness.c | 142 - third_party/aom/av1/encoder/context_tree.c | 215 - third_party/aom/av1/encoder/context_tree.h | 114 - third_party/aom/av1/encoder/corner_detect.c | 37 - third_party/aom/av1/encoder/corner_detect.h | 22 - third_party/aom/av1/encoder/corner_match.c | 191 - third_party/aom/av1/encoder/corner_match.h | 33 - third_party/aom/av1/encoder/cost.c | 46 - third_party/aom/av1/encoder/cost.h | 47 - third_party/aom/av1/encoder/dwt.c | 155 - third_party/aom/av1/encoder/dwt.h | 25 - third_party/aom/av1/encoder/encodeframe.c | 5739 --------- third_party/aom/av1/encoder/encodeframe.h | 47 - third_party/aom/av1/encoder/encodemb.c | 649 - third_party/aom/av1/encoder/encodemb.h | 96 - third_party/aom/av1/encoder/encodemv.c | 239 - third_party/aom/av1/encoder/encodemv.h | 55 - third_party/aom/av1/encoder/encoder.c | 6437 ---------- third_party/aom/av1/encoder/encoder.h | 985 -- third_party/aom/av1/encoder/encodetxb.c | 2062 ---- third_party/aom/av1/encoder/encodetxb.h | 87 - third_party/aom/av1/encoder/ethread.c | 261 - third_party/aom/av1/encoder/ethread.h | 37 - third_party/aom/av1/encoder/extend.c | 188 - third_party/aom/av1/encoder/extend.h | 32 - third_party/aom/av1/encoder/firstpass.c | 3480 ------ third_party/aom/av1/encoder/firstpass.h | 208 - third_party/aom/av1/encoder/global_motion.c | 298 - third_party/aom/av1/encoder/global_motion.h | 64 - third_party/aom/av1/encoder/grain_test_vectors.h | 781 -- third_party/aom/av1/encoder/hash.c | 125 - third_party/aom/av1/encoder/hash.h | 52 - third_party/aom/av1/encoder/hash_motion.c | 482 - third_party/aom/av1/encoder/hash_motion.h | 78 - third_party/aom/av1/encoder/hybrid_fwd_txfm.c | 390 - third_party/aom/av1/encoder/hybrid_fwd_txfm.h | 31 - third_party/aom/av1/encoder/k_means_template.h | 123 - third_party/aom/av1/encoder/lookahead.c | 210 - third_party/aom/av1/encoder/lookahead.h | 106 - third_party/aom/av1/encoder/mathutils.h | 359 - third_party/aom/av1/encoder/mbgraph.c | 401 - third_party/aom/av1/encoder/mbgraph.h | 41 - third_party/aom/av1/encoder/mcomp.c | 2885 ----- third_party/aom/av1/encoder/mcomp.h | 161 - third_party/aom/av1/encoder/mips/msa/error_msa.c | 109 - third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c | 46 - .../aom/av1/encoder/mips/msa/temporal_filter_msa.c | 285 - third_party/aom/av1/encoder/ml.c | 73 - third_party/aom/av1/encoder/ml.h | 49 - third_party/aom/av1/encoder/palette.c | 154 - third_party/aom/av1/encoder/palette.h | 96 - .../aom/av1/encoder/partition_model_weights.h | 2448 ---- third_party/aom/av1/encoder/pickcdef.c | 526 - third_party/aom/av1/encoder/picklpf.c | 263 - third_party/aom/av1/encoder/picklpf.h | 30 - third_party/aom/av1/encoder/pickrst.c | 1362 --- third_party/aom/av1/encoder/pickrst.h | 46 - third_party/aom/av1/encoder/pustats.h | 198 - third_party/aom/av1/encoder/random.h | 29 - third_party/aom/av1/encoder/ransac.c | 603 - third_party/aom/av1/encoder/ransac.h | 35 - .../aom/av1/encoder/rate_distortion_model_params.h | 591 - third_party/aom/av1/encoder/ratectrl.c | 1776 --- third_party/aom/av1/encoder/ratectrl.h | 295 - third_party/aom/av1/encoder/rd.c | 1512 --- third_party/aom/av1/encoder/rd.h | 464 - third_party/aom/av1/encoder/rdopt.c | 12199 ------------------- third_party/aom/av1/encoder/rdopt.h | 138 - third_party/aom/av1/encoder/reconinter_enc.c | 627 - third_party/aom/av1/encoder/reconinter_enc.h | 127 - third_party/aom/av1/encoder/segmentation.c | 244 - third_party/aom/av1/encoder/segmentation.h | 38 - third_party/aom/av1/encoder/speed_features.c | 564 - third_party/aom/av1/encoder/speed_features.h | 568 - third_party/aom/av1/encoder/temporal_filter.c | 602 - third_party/aom/av1/encoder/temporal_filter.h | 25 - third_party/aom/av1/encoder/tokenize.c | 248 - third_party/aom/av1/encoder/tokenize.h | 73 - .../aom/av1/encoder/tx_prune_model_weights.h | 1944 --- third_party/aom/av1/encoder/wedge_utils.c | 125 - .../aom/av1/encoder/x86/av1_fwd_txfm1d_sse4.c | 1217 -- .../aom/av1/encoder/x86/av1_fwd_txfm2d_avx2.c | 2068 ---- .../aom/av1/encoder/x86/av1_fwd_txfm2d_sse4.c | 365 - .../aom/av1/encoder/x86/av1_fwd_txfm_avx2.h | 103 - .../aom/av1/encoder/x86/av1_fwd_txfm_sse2.c | 2889 ----- .../aom/av1/encoder/x86/av1_fwd_txfm_sse2.h | 117 - .../aom/av1/encoder/x86/av1_highbd_quantize_avx2.c | 137 - .../aom/av1/encoder/x86/av1_highbd_quantize_sse4.c | 195 - .../aom/av1/encoder/x86/av1_quantize_avx2.c | 330 - .../aom/av1/encoder/x86/av1_quantize_sse2.c | 189 - .../av1/encoder/x86/av1_quantize_ssse3_x86_64.asm | 204 - .../aom/av1/encoder/x86/av1_ssim_opt_x86_64.asm | 222 - third_party/aom/av1/encoder/x86/av1_txfm1d_sse4.h | 142 - .../aom/av1/encoder/x86/corner_match_sse4.c | 103 - third_party/aom/av1/encoder/x86/dct_sse2.asm | 82 - third_party/aom/av1/encoder/x86/encodetxb_avx2.c | 130 - third_party/aom/av1/encoder/x86/encodetxb_sse2.c | 505 - third_party/aom/av1/encoder/x86/encodetxb_sse4.c | 92 - .../aom/av1/encoder/x86/error_intrin_avx2.c | 88 - third_party/aom/av1/encoder/x86/error_sse2.asm | 79 - third_party/aom/av1/encoder/x86/hash_sse42.c | 51 - .../encoder/x86/highbd_block_error_intrin_sse2.c | 72 - .../aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c | 1783 --- third_party/aom/av1/encoder/x86/pickrst_avx2.c | 403 - third_party/aom/av1/encoder/x86/pickrst_sse4.c | 389 - .../av1/encoder/x86/temporal_filter_apply_sse2.asm | 217 - third_party/aom/av1/encoder/x86/wedge_utils_avx2.c | 215 - third_party/aom/av1/encoder/x86/wedge_utils_sse2.c | 254 - 124 files changed, 78381 deletions(-) delete mode 100644 third_party/aom/av1/encoder/aq_complexity.c delete mode 100644 third_party/aom/av1/encoder/aq_complexity.h delete mode 100644 third_party/aom/av1/encoder/aq_cyclicrefresh.c delete mode 100644 third_party/aom/av1/encoder/aq_cyclicrefresh.h delete mode 100644 third_party/aom/av1/encoder/aq_variance.c delete mode 100644 third_party/aom/av1/encoder/aq_variance.h delete mode 100644 third_party/aom/av1/encoder/arm/neon/quantize_neon.c delete mode 100644 third_party/aom/av1/encoder/av1_fwd_txfm1d.c delete mode 100644 third_party/aom/av1/encoder/av1_fwd_txfm1d.h delete mode 100644 third_party/aom/av1/encoder/av1_fwd_txfm1d_cfg.h delete mode 100644 third_party/aom/av1/encoder/av1_fwd_txfm2d.c delete mode 100644 third_party/aom/av1/encoder/av1_quantize.c delete mode 100644 third_party/aom/av1/encoder/av1_quantize.h delete mode 100644 third_party/aom/av1/encoder/bitstream.c delete mode 100644 third_party/aom/av1/encoder/bitstream.h delete mode 100644 third_party/aom/av1/encoder/block.h delete mode 100644 third_party/aom/av1/encoder/blockiness.c delete mode 100644 third_party/aom/av1/encoder/context_tree.c delete mode 100644 third_party/aom/av1/encoder/context_tree.h delete mode 100644 third_party/aom/av1/encoder/corner_detect.c delete mode 100644 third_party/aom/av1/encoder/corner_detect.h delete mode 100644 third_party/aom/av1/encoder/corner_match.c delete mode 100644 third_party/aom/av1/encoder/corner_match.h delete mode 100644 third_party/aom/av1/encoder/cost.c delete mode 100644 third_party/aom/av1/encoder/cost.h delete mode 100644 third_party/aom/av1/encoder/dwt.c delete mode 100644 third_party/aom/av1/encoder/dwt.h delete mode 100644 third_party/aom/av1/encoder/encodeframe.c delete mode 100644 third_party/aom/av1/encoder/encodeframe.h delete mode 100644 third_party/aom/av1/encoder/encodemb.c delete mode 100644 third_party/aom/av1/encoder/encodemb.h delete mode 100644 third_party/aom/av1/encoder/encodemv.c delete mode 100644 third_party/aom/av1/encoder/encodemv.h delete mode 100644 third_party/aom/av1/encoder/encoder.c delete mode 100644 third_party/aom/av1/encoder/encoder.h delete mode 100644 third_party/aom/av1/encoder/encodetxb.c delete mode 100644 third_party/aom/av1/encoder/encodetxb.h delete mode 100644 third_party/aom/av1/encoder/ethread.c delete mode 100644 third_party/aom/av1/encoder/ethread.h delete mode 100644 third_party/aom/av1/encoder/extend.c delete mode 100644 third_party/aom/av1/encoder/extend.h delete mode 100644 third_party/aom/av1/encoder/firstpass.c delete mode 100644 third_party/aom/av1/encoder/firstpass.h delete mode 100644 third_party/aom/av1/encoder/global_motion.c delete mode 100644 third_party/aom/av1/encoder/global_motion.h delete mode 100644 third_party/aom/av1/encoder/grain_test_vectors.h delete mode 100644 third_party/aom/av1/encoder/hash.c delete mode 100644 third_party/aom/av1/encoder/hash.h delete mode 100644 third_party/aom/av1/encoder/hash_motion.c delete mode 100644 third_party/aom/av1/encoder/hash_motion.h delete mode 100644 third_party/aom/av1/encoder/hybrid_fwd_txfm.c delete mode 100644 third_party/aom/av1/encoder/hybrid_fwd_txfm.h delete mode 100644 third_party/aom/av1/encoder/k_means_template.h delete mode 100644 third_party/aom/av1/encoder/lookahead.c delete mode 100644 third_party/aom/av1/encoder/lookahead.h delete mode 100644 third_party/aom/av1/encoder/mathutils.h delete mode 100644 third_party/aom/av1/encoder/mbgraph.c delete mode 100644 third_party/aom/av1/encoder/mbgraph.h delete mode 100644 third_party/aom/av1/encoder/mcomp.c delete mode 100644 third_party/aom/av1/encoder/mcomp.h delete mode 100644 third_party/aom/av1/encoder/mips/msa/error_msa.c delete mode 100644 third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c delete mode 100644 third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c delete mode 100644 third_party/aom/av1/encoder/ml.c delete mode 100644 third_party/aom/av1/encoder/ml.h delete mode 100644 third_party/aom/av1/encoder/palette.c delete mode 100644 third_party/aom/av1/encoder/palette.h delete mode 100644 third_party/aom/av1/encoder/partition_model_weights.h delete mode 100644 third_party/aom/av1/encoder/pickcdef.c delete mode 100644 third_party/aom/av1/encoder/picklpf.c delete mode 100644 third_party/aom/av1/encoder/picklpf.h delete mode 100644 third_party/aom/av1/encoder/pickrst.c delete mode 100644 third_party/aom/av1/encoder/pickrst.h delete mode 100644 third_party/aom/av1/encoder/pustats.h delete mode 100644 third_party/aom/av1/encoder/random.h delete mode 100644 third_party/aom/av1/encoder/ransac.c delete mode 100644 third_party/aom/av1/encoder/ransac.h delete mode 100644 third_party/aom/av1/encoder/rate_distortion_model_params.h delete mode 100644 third_party/aom/av1/encoder/ratectrl.c delete mode 100644 third_party/aom/av1/encoder/ratectrl.h delete mode 100644 third_party/aom/av1/encoder/rd.c delete mode 100644 third_party/aom/av1/encoder/rd.h delete mode 100644 third_party/aom/av1/encoder/rdopt.c delete mode 100644 third_party/aom/av1/encoder/rdopt.h delete mode 100644 third_party/aom/av1/encoder/reconinter_enc.c delete mode 100644 third_party/aom/av1/encoder/reconinter_enc.h delete mode 100644 third_party/aom/av1/encoder/segmentation.c delete mode 100644 third_party/aom/av1/encoder/segmentation.h delete mode 100644 third_party/aom/av1/encoder/speed_features.c delete mode 100644 third_party/aom/av1/encoder/speed_features.h delete mode 100644 third_party/aom/av1/encoder/temporal_filter.c delete mode 100644 third_party/aom/av1/encoder/temporal_filter.h delete mode 100644 third_party/aom/av1/encoder/tokenize.c delete mode 100644 third_party/aom/av1/encoder/tokenize.h delete mode 100644 third_party/aom/av1/encoder/tx_prune_model_weights.h delete mode 100644 third_party/aom/av1/encoder/wedge_utils.c delete mode 100644 third_party/aom/av1/encoder/x86/av1_fwd_txfm1d_sse4.c delete mode 100644 third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_avx2.c delete mode 100644 third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_sse4.c delete mode 100644 third_party/aom/av1/encoder/x86/av1_fwd_txfm_avx2.h delete mode 100644 third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.c delete mode 100644 third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.h delete mode 100644 third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c delete mode 100644 third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c delete mode 100644 third_party/aom/av1/encoder/x86/av1_quantize_avx2.c delete mode 100644 third_party/aom/av1/encoder/x86/av1_quantize_sse2.c delete mode 100644 third_party/aom/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm delete mode 100644 third_party/aom/av1/encoder/x86/av1_ssim_opt_x86_64.asm delete mode 100644 third_party/aom/av1/encoder/x86/av1_txfm1d_sse4.h delete mode 100644 third_party/aom/av1/encoder/x86/corner_match_sse4.c delete mode 100644 third_party/aom/av1/encoder/x86/dct_sse2.asm delete mode 100644 third_party/aom/av1/encoder/x86/encodetxb_avx2.c delete mode 100644 third_party/aom/av1/encoder/x86/encodetxb_sse2.c delete mode 100644 third_party/aom/av1/encoder/x86/encodetxb_sse4.c delete mode 100644 third_party/aom/av1/encoder/x86/error_intrin_avx2.c delete mode 100644 third_party/aom/av1/encoder/x86/error_sse2.asm delete mode 100644 third_party/aom/av1/encoder/x86/hash_sse42.c delete mode 100644 third_party/aom/av1/encoder/x86/highbd_block_error_intrin_sse2.c delete mode 100644 third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c delete mode 100644 third_party/aom/av1/encoder/x86/pickrst_avx2.c delete mode 100644 third_party/aom/av1/encoder/x86/pickrst_sse4.c delete mode 100644 third_party/aom/av1/encoder/x86/temporal_filter_apply_sse2.asm delete mode 100644 third_party/aom/av1/encoder/x86/wedge_utils_avx2.c delete mode 100644 third_party/aom/av1/encoder/x86/wedge_utils_sse2.c (limited to 'third_party/aom/av1/encoder') diff --git a/third_party/aom/av1/encoder/aq_complexity.c b/third_party/aom/av1/encoder/aq_complexity.c deleted file mode 100644 index 80f8e2e66..000000000 --- a/third_party/aom/av1/encoder/aq_complexity.c +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include - -#include "av1/encoder/aq_complexity.h" -#include "av1/encoder/aq_variance.h" -#include "av1/encoder/encodeframe.h" -#include "av1/common/seg_common.h" -#include "av1/encoder/segmentation.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_ports/system_state.h" - -#define AQ_C_SEGMENTS 5 -#define DEFAULT_AQ2_SEG 3 // Neutral Q segment -#define AQ_C_STRENGTHS 3 -static const double aq_c_q_adj_factor[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = { - { 1.75, 1.25, 1.05, 1.00, 0.90 }, - { 2.00, 1.50, 1.15, 1.00, 0.85 }, - { 2.50, 1.75, 1.25, 1.00, 0.80 } -}; -static const double aq_c_transitions[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = { - { 0.15, 0.30, 0.55, 2.00, 100.0 }, - { 0.20, 0.40, 0.65, 2.00, 100.0 }, - { 0.25, 0.50, 0.75, 2.00, 100.0 } -}; -static const double aq_c_var_thresholds[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = { - { -4.0, -3.0, -2.0, 100.00, 100.0 }, - { -3.5, -2.5, -1.5, 100.00, 100.0 }, - { -3.0, -2.0, -1.0, 100.00, 100.0 } -}; - -static int get_aq_c_strength(int q_index, aom_bit_depth_t bit_depth) { - // Approximate base quatizer (truncated to int) - const int base_quant = av1_ac_quant_Q3(q_index, 0, bit_depth) / 4; - return (base_quant > 10) + (base_quant > 25); -} - -void av1_setup_in_frame_q_adj(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - struct segmentation *const seg = &cm->seg; - int resolution_change = - cm->prev_frame && (cm->width != cm->prev_frame->width || - cm->height != cm->prev_frame->height); - - // Make SURE use of floating point in this function is safe. - aom_clear_system_state(); - - if (resolution_change) { - memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - av1_clearall_segfeatures(seg); - av1_disable_segmentation(seg); - return; - } - - if (frame_is_intra_only(cm) || cm->error_resilient_mode || - cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { - int segment; - const int aq_strength = - get_aq_c_strength(cm->base_qindex, cm->seq_params.bit_depth); - - // Clear down the segment map. - memset(cpi->segmentation_map, DEFAULT_AQ2_SEG, cm->mi_rows * cm->mi_cols); - - av1_clearall_segfeatures(seg); - - // Segmentation only makes sense if the target bits per SB is above a - // threshold. Below this the overheads will usually outweigh any benefit. - if (cpi->rc.sb64_target_rate < 256) { - av1_disable_segmentation(seg); - return; - } - - av1_enable_segmentation(seg); - - // Default segment "Q" feature is disabled so it defaults to the baseline Q. - av1_disable_segfeature(seg, DEFAULT_AQ2_SEG, SEG_LVL_ALT_Q); - - // Use some of the segments for in frame Q adjustment. - for (segment = 0; segment < AQ_C_SEGMENTS; ++segment) { - int qindex_delta; - - if (segment == DEFAULT_AQ2_SEG) continue; - - qindex_delta = av1_compute_qdelta_by_rate( - &cpi->rc, cm->frame_type, cm->base_qindex, - aq_c_q_adj_factor[aq_strength][segment], cm->seq_params.bit_depth); - - // For AQ complexity mode, we dont allow Q0 in a segment if the base - // Q is not 0. Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment - // Q delta is sometimes applied without going back around the rd loop. - // This could lead to an illegal combination of partition size and q. - if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) { - qindex_delta = -cm->base_qindex + 1; - } - if ((cm->base_qindex + qindex_delta) > 0) { - av1_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); - av1_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); - } - } - } -} - -#define DEFAULT_LV_THRESH 10.0 -#define MIN_DEFAULT_LV_THRESH 8.0 -// Select a segment for the current block. -// The choice of segment for a block depends on the ratio of the projected -// bits for the block vs a target average and its spatial complexity. -void av1_caq_select_segment(const AV1_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs, - int mi_row, int mi_col, int projected_rate) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - - const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int xmis = AOMMIN(cm->mi_cols - mi_col, mi_size_wide[bs]); - const int ymis = AOMMIN(cm->mi_rows - mi_row, mi_size_high[bs]); - int x, y; - int i; - unsigned char segment; - - if (0) { - segment = DEFAULT_AQ2_SEG; - } else { - // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh). - // It is converted to bits << AV1_PROB_COST_SHIFT units. - const int64_t num = (int64_t)(cpi->rc.sb64_target_rate * xmis * ymis) - << AV1_PROB_COST_SHIFT; - const int denom = cm->seq_params.mib_size * cm->seq_params.mib_size; - const int target_rate = (int)(num / denom); - double logvar; - double low_var_thresh; - const int aq_strength = - get_aq_c_strength(cm->base_qindex, cm->seq_params.bit_depth); - - aom_clear_system_state(); - low_var_thresh = - (cpi->oxcf.pass == 2) - ? AOMMAX(exp(cpi->twopass.mb_av_energy), MIN_DEFAULT_LV_THRESH) - : DEFAULT_LV_THRESH; - - av1_setup_src_planes(mb, cpi->source, mi_row, mi_col, num_planes); - logvar = av1_log_block_var(cpi, mb, bs); - - segment = AQ_C_SEGMENTS - 1; // Just in case no break out below. - for (i = 0; i < AQ_C_SEGMENTS; ++i) { - // Test rate against a threshold value and variance against a threshold. - // Increasing segment number (higher variance and complexity) = higher Q. - if ((projected_rate < target_rate * aq_c_transitions[aq_strength][i]) && - (logvar < (low_var_thresh + aq_c_var_thresholds[aq_strength][i]))) { - segment = i; - break; - } - } - } - - // Fill in the entires in the segment map corresponding to this SB64. - for (y = 0; y < ymis; y++) { - for (x = 0; x < xmis; x++) { - cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment; - } - } -} diff --git a/third_party/aom/av1/encoder/aq_complexity.h b/third_party/aom/av1/encoder/aq_complexity.h deleted file mode 100644 index 3421d74c9..000000000 --- a/third_party/aom/av1/encoder/aq_complexity.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_AQ_COMPLEXITY_H_ -#define AOM_AV1_ENCODER_AQ_COMPLEXITY_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "av1/common/enums.h" - -struct AV1_COMP; -struct macroblock; - -// Select a segment for the current Block. -void av1_caq_select_segment(const struct AV1_COMP *cpi, struct macroblock *, - BLOCK_SIZE bs, int mi_row, int mi_col, - int projected_rate); - -// This function sets up a set of segments with delta Q values around -// the baseline frame quantizer. -void av1_setup_in_frame_q_adj(struct AV1_COMP *cpi); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_AQ_COMPLEXITY_H_ diff --git a/third_party/aom/av1/encoder/aq_cyclicrefresh.c b/third_party/aom/av1/encoder/aq_cyclicrefresh.c deleted file mode 100644 index f532d48da..000000000 --- a/third_party/aom/av1/encoder/aq_cyclicrefresh.c +++ /dev/null @@ -1,580 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include - -#include "av1/common/seg_common.h" -#include "av1/encoder/aq_cyclicrefresh.h" -#include "av1/encoder/ratectrl.h" -#include "av1/encoder/segmentation.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_ports/system_state.h" - -struct CYCLIC_REFRESH { - // Percentage of blocks per frame that are targeted as candidates - // for cyclic refresh. - int percent_refresh; - // Maximum q-delta as percentage of base q. - int max_qdelta_perc; - // Superblock starting index for cycling through the frame. - int sb_index; - // Controls how long block will need to wait to be refreshed again, in - // excess of the cycle time, i.e., in the case of all zero motion, block - // will be refreshed every (100/percent_refresh + time_for_refresh) frames. - int time_for_refresh; - // Target number of (8x8) blocks that are set for delta-q. - int target_num_seg_blocks; - // Actual number of (8x8) blocks that were applied delta-q. - int actual_num_seg1_blocks; - int actual_num_seg2_blocks; - // RD mult. parameters for segment 1. - int rdmult; - // Cyclic refresh map. - int8_t *map; - // Map of the last q a block was coded at. - uint8_t *last_coded_q_map; - // Thresholds applied to the projected rate/distortion of the coding block, - // when deciding whether block should be refreshed. - int64_t thresh_rate_sb; - int64_t thresh_dist_sb; - // Threshold applied to the motion vector (in units of 1/8 pel) of the - // coding block, when deciding whether block should be refreshed. - int16_t motion_thresh; - // Rate target ratio to set q delta. - double rate_ratio_qdelta; - // Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2. - int rate_boost_fac; - double low_content_avg; - int qindex_delta[3]; -}; - -CYCLIC_REFRESH *av1_cyclic_refresh_alloc(int mi_rows, int mi_cols) { - size_t last_coded_q_map_size; - CYCLIC_REFRESH *const cr = aom_calloc(1, sizeof(*cr)); - if (cr == NULL) return NULL; - - cr->map = aom_calloc(mi_rows * mi_cols, sizeof(*cr->map)); - if (cr->map == NULL) { - av1_cyclic_refresh_free(cr); - return NULL; - } - last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map); - cr->last_coded_q_map = aom_malloc(last_coded_q_map_size); - if (cr->last_coded_q_map == NULL) { - av1_cyclic_refresh_free(cr); - return NULL; - } - assert(MAXQ <= 255); - memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size); - - return cr; -} - -void av1_cyclic_refresh_free(CYCLIC_REFRESH *cr) { - if (cr != NULL) { - aom_free(cr->map); - aom_free(cr->last_coded_q_map); - aom_free(cr); - } -} - -// Check if we should turn off cyclic refresh based on bitrate condition. -static int apply_cyclic_refresh_bitrate(const AV1_COMMON *cm, - const RATE_CONTROL *rc) { - // Turn off cyclic refresh if bits available per frame is not sufficiently - // larger than bit cost of segmentation. Segment map bit cost should scale - // with number of seg blocks, so compare available bits to number of blocks. - // Average bits available per frame = avg_frame_bandwidth - // Number of (8x8) blocks in frame = mi_rows * mi_cols; - const float factor = 0.25; - const int number_blocks = cm->mi_rows * cm->mi_cols; - // The condition below corresponds to turning off at target bitrates: - // (at 30fps), ~12kbps for CIF, 36kbps for VGA, 100kps for HD/720p. - // Also turn off at very small frame sizes, to avoid too large fraction of - // superblocks to be refreshed per frame. Threshold below is less than QCIF. - if (rc->avg_frame_bandwidth < factor * number_blocks || - number_blocks / 64 < 5) - return 0; - else - return 1; -} - -// Check if this coding block, of size bsize, should be considered for refresh -// (lower-qp coding). Decision can be based on various factors, such as -// size of the coding block (i.e., below min_block size rejected), coding -// mode, and rate/distortion. -static int candidate_refresh_aq(const CYCLIC_REFRESH *cr, - const MB_MODE_INFO *mbmi, int64_t rate, - int64_t dist, int bsize) { - MV mv = mbmi->mv[0].as_mv; - // Reject the block for lower-qp coding if projected distortion - // is above the threshold, and any of the following is true: - // 1) mode uses large mv - // 2) mode is an intra-mode - // Otherwise accept for refresh. - if (dist > cr->thresh_dist_sb && - (mv.row > cr->motion_thresh || mv.row < -cr->motion_thresh || - mv.col > cr->motion_thresh || mv.col < -cr->motion_thresh || - !is_inter_block(mbmi))) - return CR_SEGMENT_ID_BASE; - else if (bsize >= BLOCK_16X16 && rate < cr->thresh_rate_sb && - is_inter_block(mbmi) && mbmi->mv[0].as_int == 0 && - cr->rate_boost_fac > 10) - // More aggressive delta-q for bigger blocks with zero motion. - return CR_SEGMENT_ID_BOOST2; - else - return CR_SEGMENT_ID_BOOST1; -} - -// Compute delta-q for the segment. -static int compute_deltaq(const AV1_COMP *cpi, int q, double rate_factor) { - const CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - const RATE_CONTROL *const rc = &cpi->rc; - int deltaq = - av1_compute_qdelta_by_rate(rc, cpi->common.frame_type, q, rate_factor, - cpi->common.seq_params.bit_depth); - if ((-deltaq) > cr->max_qdelta_perc * q / 100) { - deltaq = -cr->max_qdelta_perc * q / 100; - } - return deltaq; -} - -// For the just encoded frame, estimate the bits, incorporating the delta-q -// from non-base segment. For now ignore effect of multiple segments -// (with different delta-q). Note this function is called in the postencode -// (called from rc_update_rate_correction_factors()). -int av1_cyclic_refresh_estimate_bits_at_q(const AV1_COMP *cpi, - double correction_factor) { - const AV1_COMMON *const cm = &cpi->common; - const CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - int estimated_bits; - int mbs = cm->MBs; - int num8x8bl = mbs << 2; - // Weight for non-base segments: use actual number of blocks refreshed in - // previous/just encoded frame. Note number of blocks here is in 8x8 units. - double weight_segment1 = (double)cr->actual_num_seg1_blocks / num8x8bl; - double weight_segment2 = (double)cr->actual_num_seg2_blocks / num8x8bl; - // Take segment weighted average for estimated bits. - estimated_bits = - (int)((1.0 - weight_segment1 - weight_segment2) * - av1_estimate_bits_at_q(cm->frame_type, cm->base_qindex, mbs, - correction_factor, - cm->seq_params.bit_depth) + - weight_segment1 * av1_estimate_bits_at_q( - cm->frame_type, - cm->base_qindex + cr->qindex_delta[1], mbs, - correction_factor, cm->seq_params.bit_depth) + - weight_segment2 * av1_estimate_bits_at_q( - cm->frame_type, - cm->base_qindex + cr->qindex_delta[2], mbs, - correction_factor, cm->seq_params.bit_depth)); - return estimated_bits; -} - -// Prior to encoding the frame, estimate the bits per mb, for a given q = i and -// a corresponding delta-q (for segment 1). This function is called in the -// rc_regulate_q() to set the base qp index. -// Note: the segment map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or -// to 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock, prior to encoding. -int av1_cyclic_refresh_rc_bits_per_mb(const AV1_COMP *cpi, int i, - double correction_factor) { - const AV1_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - int bits_per_mb; - int num8x8bl = cm->MBs << 2; - // Weight for segment prior to encoding: take the average of the target - // number for the frame to be encoded and the actual from the previous frame. - double weight_segment = - (double)((cr->target_num_seg_blocks + cr->actual_num_seg1_blocks + - cr->actual_num_seg2_blocks) >> - 1) / - num8x8bl; - // Compute delta-q corresponding to qindex i. - int deltaq = compute_deltaq(cpi, i, cr->rate_ratio_qdelta); - // Take segment weighted average for bits per mb. - bits_per_mb = - (int)((1.0 - weight_segment) * - av1_rc_bits_per_mb(cm->frame_type, i, correction_factor, - cm->seq_params.bit_depth) + - weight_segment * av1_rc_bits_per_mb(cm->frame_type, i + deltaq, - correction_factor, - cm->seq_params.bit_depth)); - return bits_per_mb; -} - -// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), -// check if we should reset the segment_id, and update the cyclic_refresh map -// and segmentation map. -void av1_cyclic_refresh_update_segment(const AV1_COMP *cpi, - MB_MODE_INFO *const mbmi, int mi_row, - int mi_col, BLOCK_SIZE bsize, - int64_t rate, int64_t dist, int skip) { - const AV1_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - const int bw = mi_size_wide[bsize]; - const int bh = mi_size_high[bsize]; - const int xmis = AOMMIN(cm->mi_cols - mi_col, bw); - const int ymis = AOMMIN(cm->mi_rows - mi_row, bh); - const int block_index = mi_row * cm->mi_cols + mi_col; - const int refresh_this_block = - candidate_refresh_aq(cr, mbmi, rate, dist, bsize); - // Default is to not update the refresh map. - int new_map_value = cr->map[block_index]; - int x = 0; - int y = 0; - - // If this block is labeled for refresh, check if we should reset the - // segment_id. - if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) { - mbmi->segment_id = refresh_this_block; - // Reset segment_id if will be skipped. - if (skip) mbmi->segment_id = CR_SEGMENT_ID_BASE; - } - - // Update the cyclic refresh map, to be used for setting segmentation map - // for the next frame. If the block will be refreshed this frame, mark it - // as clean. The magnitude of the -ve influences how long before we consider - // it for refresh again. - if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) { - new_map_value = -cr->time_for_refresh; - } else if (refresh_this_block) { - // Else if it is accepted as candidate for refresh, and has not already - // been refreshed (marked as 1) then mark it as a candidate for cleanup - // for future time (marked as 0), otherwise don't update it. - if (cr->map[block_index] == 1) new_map_value = 0; - } else { - // Leave it marked as block that is not candidate for refresh. - new_map_value = 1; - } - - // Update entries in the cyclic refresh map with new_map_value, and - // copy mbmi->segment_id into global segmentation map. - for (y = 0; y < ymis; y++) - for (x = 0; x < xmis; x++) { - int map_offset = block_index + y * cm->mi_cols + x; - cr->map[map_offset] = new_map_value; - cpi->segmentation_map[map_offset] = mbmi->segment_id; - // Inter skip blocks were clearly not coded at the current qindex, so - // don't update the map for them. For cases where motion is non-zero or - // the reference frame isn't the previous frame, the previous value in - // the map for this spatial location is not entirely correct. - if ((!is_inter_block(mbmi) || !skip) && - mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) { - cr->last_coded_q_map[map_offset] = clamp( - cm->base_qindex + cr->qindex_delta[mbmi->segment_id], 0, MAXQ); - } else if (is_inter_block(mbmi) && skip && - mbmi->segment_id <= CR_SEGMENT_ID_BOOST2) { - cr->last_coded_q_map[map_offset] = - AOMMIN(clamp(cm->base_qindex + cr->qindex_delta[mbmi->segment_id], - 0, MAXQ), - cr->last_coded_q_map[map_offset]); - } - } -} - -// Update the actual number of blocks that were applied the segment delta q. -void av1_cyclic_refresh_postencode(AV1_COMP *const cpi) { - AV1_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - unsigned char *const seg_map = cpi->segmentation_map; - int mi_row, mi_col; - cr->actual_num_seg1_blocks = 0; - cr->actual_num_seg2_blocks = 0; - for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) - for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { - if (cyclic_refresh_segment_id(seg_map[mi_row * cm->mi_cols + mi_col]) == - CR_SEGMENT_ID_BOOST1) - cr->actual_num_seg1_blocks++; - else if (cyclic_refresh_segment_id( - seg_map[mi_row * cm->mi_cols + mi_col]) == - CR_SEGMENT_ID_BOOST2) - cr->actual_num_seg2_blocks++; - } -} - -// Set golden frame update interval, for 1 pass CBR mode. -void av1_cyclic_refresh_set_golden_update(AV1_COMP *const cpi) { - RATE_CONTROL *const rc = &cpi->rc; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - // Set minimum gf_interval for GF update to a multiple (== 2) of refresh - // period. Depending on past encoding stats, GF flag may be reset and update - // may not occur until next baseline_gf_interval. - if (cr->percent_refresh > 0) - rc->baseline_gf_interval = 4 * (100 / cr->percent_refresh); - else - rc->baseline_gf_interval = 40; -} - -// Update some encoding stats (from the just encoded frame). If this frame's -// background has high motion, refresh the golden frame. Otherwise, if the -// golden reference is to be updated check if we should NOT update the golden -// ref. -void av1_cyclic_refresh_check_golden_update(AV1_COMP *const cpi) { - AV1_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - int mi_row, mi_col; - double fraction_low = 0.0; - int low_content_frame = 0; - - MB_MODE_INFO **mi; - RATE_CONTROL *const rc = &cpi->rc; - const int rows = cm->mi_rows, cols = cm->mi_cols; - int cnt1 = 0, cnt2 = 0; - int force_gf_refresh = 0; - - for (mi_row = 0; mi_row < rows; mi_row++) { - mi = cm->mi_grid_visible + mi_row * cm->mi_stride; - - for (mi_col = 0; mi_col < cols; mi_col++) { - int16_t abs_mvr = mi[0]->mv[0].as_mv.row >= 0 - ? mi[0]->mv[0].as_mv.row - : -1 * mi[0]->mv[0].as_mv.row; - int16_t abs_mvc = mi[0]->mv[0].as_mv.col >= 0 - ? mi[0]->mv[0].as_mv.col - : -1 * mi[0]->mv[0].as_mv.col; - - // Calculate the motion of the background. - if (abs_mvr <= 16 && abs_mvc <= 16) { - cnt1++; - if (abs_mvr == 0 && abs_mvc == 0) cnt2++; - } - mi++; - - // Accumulate low_content_frame. - if (cr->map[mi_row * cols + mi_col] < 1) low_content_frame++; - } - } - - // For video conference clips, if the background has high motion in current - // frame because of the camera movement, set this frame as the golden frame. - // Use 70% and 5% as the thresholds for golden frame refreshing. - if (cnt1 * 10 > (70 * rows * cols) && cnt2 * 20 < cnt1) { - av1_cyclic_refresh_set_golden_update(cpi); - rc->frames_till_gf_update_due = rc->baseline_gf_interval; - - if (rc->frames_till_gf_update_due > rc->frames_to_key) - rc->frames_till_gf_update_due = rc->frames_to_key; - cpi->refresh_golden_frame = 1; - force_gf_refresh = 1; - } - - fraction_low = (double)low_content_frame / (rows * cols); - // Update average. - cr->low_content_avg = (fraction_low + 3 * cr->low_content_avg) / 4; - if (!force_gf_refresh && cpi->refresh_golden_frame == 1) { - // Don't update golden reference if the amount of low_content for the - // current encoded frame is small, or if the recursive average of the - // low_content over the update interval window falls below threshold. - if (fraction_low < 0.8 || cr->low_content_avg < 0.7) - cpi->refresh_golden_frame = 0; - // Reset for next internal. - cr->low_content_avg = fraction_low; - } -} - -// Update the segmentation map, and related quantities: cyclic refresh map, -// refresh sb_index, and target number of blocks to be refreshed. -// The map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or to -// 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock. -// Blocks labeled as BOOST1 may later get set to BOOST2 (during the -// encoding of the superblock). -static void cyclic_refresh_update_map(AV1_COMP *const cpi) { - AV1_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - unsigned char *const seg_map = cpi->segmentation_map; - int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame; - int xmis, ymis, x, y; - memset(seg_map, CR_SEGMENT_ID_BASE, cm->mi_rows * cm->mi_cols); - sb_cols = - (cm->mi_cols + cm->seq_params.mib_size - 1) / cm->seq_params.mib_size; - sb_rows = - (cm->mi_rows + cm->seq_params.mib_size - 1) / cm->seq_params.mib_size; - sbs_in_frame = sb_cols * sb_rows; - // Number of target blocks to get the q delta (segment 1). - block_count = cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100; - // Set the segmentation map: cycle through the superblocks, starting at - // cr->mb_index, and stopping when either block_count blocks have been found - // to be refreshed, or we have passed through whole frame. - if (cr->sb_index >= sbs_in_frame) cr->sb_index = 0; - assert(cr->sb_index < sbs_in_frame); - i = cr->sb_index; - cr->target_num_seg_blocks = 0; - do { - int sum_map = 0; - // Get the mi_row/mi_col corresponding to superblock index i. - int sb_row_index = (i / sb_cols); - int sb_col_index = i - sb_row_index * sb_cols; - int mi_row = sb_row_index * cm->seq_params.mib_size; - int mi_col = sb_col_index * cm->seq_params.mib_size; - int qindex_thresh = - cpi->oxcf.content == AOM_CONTENT_SCREEN - ? av1_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex) - : 0; - assert(mi_row >= 0 && mi_row < cm->mi_rows); - assert(mi_col >= 0 && mi_col < cm->mi_cols); - bl_index = mi_row * cm->mi_cols + mi_col; - // Loop through all MI blocks in superblock and update map. - xmis = AOMMIN(cm->mi_cols - mi_col, cm->seq_params.mib_size); - ymis = AOMMIN(cm->mi_rows - mi_row, cm->seq_params.mib_size); - for (y = 0; y < ymis; y++) { - for (x = 0; x < xmis; x++) { - const int bl_index2 = bl_index + y * cm->mi_cols + x; - // If the block is as a candidate for clean up then mark it - // for possible boost/refresh (segment 1). The segment id may get - // reset to 0 later if block gets coded anything other than GLOBALMV. - if (cr->map[bl_index2] == 0) { - if (cr->last_coded_q_map[bl_index2] > qindex_thresh) sum_map++; - } else if (cr->map[bl_index2] < 0) { - cr->map[bl_index2]++; - } - } - } - // Enforce constant segment over superblock. - // If segment is at least half of superblock, set to 1. - if (sum_map >= xmis * ymis / 2) { - for (y = 0; y < ymis; y++) - for (x = 0; x < xmis; x++) { - seg_map[bl_index + y * cm->mi_cols + x] = CR_SEGMENT_ID_BOOST1; - } - cr->target_num_seg_blocks += xmis * ymis; - } - i++; - if (i == sbs_in_frame) { - i = 0; - } - } while (cr->target_num_seg_blocks < block_count && i != cr->sb_index); - cr->sb_index = i; -} - -// Set cyclic refresh parameters. -void av1_cyclic_refresh_update_parameters(AV1_COMP *const cpi) { - const RATE_CONTROL *const rc = &cpi->rc; - const AV1_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - cr->percent_refresh = 10; - cr->max_qdelta_perc = 50; - cr->time_for_refresh = 0; - // Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4) - // periods of the refresh cycle, after a key frame. - if (rc->frames_since_key < 4 * cr->percent_refresh) - cr->rate_ratio_qdelta = 3.0; - else - cr->rate_ratio_qdelta = 2.0; - // Adjust some parameters for low resolutions at low bitrates. - if (cm->width <= 352 && cm->height <= 288 && rc->avg_frame_bandwidth < 3400) { - cr->motion_thresh = 4; - cr->rate_boost_fac = 10; - } else { - cr->motion_thresh = 32; - cr->rate_boost_fac = 17; - } -} - -// Setup cyclic background refresh: set delta q and segmentation map. -void av1_cyclic_refresh_setup(AV1_COMP *const cpi) { - AV1_COMMON *const cm = &cpi->common; - const RATE_CONTROL *const rc = &cpi->rc; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - struct segmentation *const seg = &cm->seg; - const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc); - int resolution_change = - cm->prev_frame && (cm->width != cm->prev_frame->width || - cm->height != cm->prev_frame->height); - if (resolution_change) { - memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - av1_clearall_segfeatures(seg); - aom_clear_system_state(); - av1_disable_segmentation(seg); - return; - } - if (cm->current_video_frame == 0) cr->low_content_avg = 0.0; - // Don't apply refresh on key frame or enhancement layer frames. - if (!apply_cyclic_refresh || cm->frame_type == KEY_FRAME) { - // Set segmentation map to 0 and disable. - unsigned char *const seg_map = cpi->segmentation_map; - memset(seg_map, 0, cm->mi_rows * cm->mi_cols); - av1_disable_segmentation(&cm->seg); - if (cm->frame_type == KEY_FRAME) { - memset(cr->last_coded_q_map, MAXQ, - cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); - cr->sb_index = 0; - } - return; - } else { - int qindex_delta = 0; - int qindex2; - const double q = - av1_convert_qindex_to_q(cm->base_qindex, cm->seq_params.bit_depth); - aom_clear_system_state(); - // Set rate threshold to some multiple (set to 2 for now) of the target - // rate (target is given by sb64_target_rate and scaled by 256). - cr->thresh_rate_sb = ((int64_t)(rc->sb64_target_rate) << 8) << 2; - // Distortion threshold, quadratic in Q, scale factor to be adjusted. - // q will not exceed 457, so (q * q) is within 32bit; see: - // av1_convert_qindex_to_q(), av1_ac_quant(), ac_qlookup*[]. - cr->thresh_dist_sb = ((int64_t)(q * q)) << 2; - - // Set up segmentation. - // Clear down the segment map. - av1_enable_segmentation(&cm->seg); - av1_clearall_segfeatures(seg); - - // Note: setting temporal_update has no effect, as the seg-map coding method - // (temporal or spatial) is determined in - // av1_choose_segmap_coding_method(), - // based on the coding cost of each method. For error_resilient mode on the - // last_frame_seg_map is set to 0, so if temporal coding is used, it is - // relative to 0 previous map. - // seg->temporal_update = 0; - - // Segment BASE "Q" feature is disabled so it defaults to the baseline Q. - av1_disable_segfeature(seg, CR_SEGMENT_ID_BASE, SEG_LVL_ALT_Q); - // Use segment BOOST1 for in-frame Q adjustment. - av1_enable_segfeature(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q); - // Use segment BOOST2 for more aggressive in-frame Q adjustment. - av1_enable_segfeature(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q); - - // Set the q delta for segment BOOST1. - qindex_delta = compute_deltaq(cpi, cm->base_qindex, cr->rate_ratio_qdelta); - cr->qindex_delta[1] = qindex_delta; - - // Compute rd-mult for segment BOOST1. - qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ); - - cr->rdmult = av1_compute_rd_mult(cpi, qindex2); - - av1_set_segdata(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q, qindex_delta); - - // Set a more aggressive (higher) q delta for segment BOOST2. - qindex_delta = compute_deltaq( - cpi, cm->base_qindex, - AOMMIN(CR_MAX_RATE_TARGET_RATIO, - 0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta)); - cr->qindex_delta[2] = qindex_delta; - av1_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta); - - // Update the segmentation and refresh map. - cyclic_refresh_update_map(cpi); - } -} - -int av1_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) { - return cr->rdmult; -} - -void av1_cyclic_refresh_reset_resize(AV1_COMP *const cpi) { - const AV1_COMMON *const cm = &cpi->common; - CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; - memset(cr->map, 0, cm->mi_rows * cm->mi_cols); - cr->sb_index = 0; - cpi->refresh_golden_frame = 1; -} diff --git a/third_party/aom/av1/encoder/aq_cyclicrefresh.h b/third_party/aom/av1/encoder/aq_cyclicrefresh.h deleted file mode 100644 index b45781983..000000000 --- a/third_party/aom/av1/encoder/aq_cyclicrefresh.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_AQ_CYCLICREFRESH_H_ -#define AOM_AV1_ENCODER_AQ_CYCLICREFRESH_H_ - -#include "av1/common/blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// The segment ids used in cyclic refresh: from base (no boost) to increasing -// boost (higher delta-qp). -#define CR_SEGMENT_ID_BASE 0 -#define CR_SEGMENT_ID_BOOST1 1 -#define CR_SEGMENT_ID_BOOST2 2 - -// Maximum rate target ratio for setting segment delta-qp. -#define CR_MAX_RATE_TARGET_RATIO 4.0 - -struct AV1_COMP; - -struct CYCLIC_REFRESH; -typedef struct CYCLIC_REFRESH CYCLIC_REFRESH; - -CYCLIC_REFRESH *av1_cyclic_refresh_alloc(int mi_rows, int mi_cols); - -void av1_cyclic_refresh_free(CYCLIC_REFRESH *cr); - -// Estimate the bits, incorporating the delta-q from segment 1, after encoding -// the frame. -int av1_cyclic_refresh_estimate_bits_at_q(const struct AV1_COMP *cpi, - double correction_factor); - -// Estimate the bits per mb, for a given q = i and a corresponding delta-q -// (for segment 1), prior to encoding the frame. -int av1_cyclic_refresh_rc_bits_per_mb(const struct AV1_COMP *cpi, int i, - double correction_factor); - -// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), -// check if we should reset the segment_id, and update the cyclic_refresh map -// and segmentation map. -void av1_cyclic_refresh_update_segment(const struct AV1_COMP *cpi, - MB_MODE_INFO *const mbmi, int mi_row, - int mi_col, BLOCK_SIZE bsize, - int64_t rate, int64_t dist, int skip); - -// Update the segmentation map, and related quantities: cyclic refresh map, -// refresh sb_index, and target number of blocks to be refreshed. -void av1_cyclic_refresh_update__map(struct AV1_COMP *const cpi); - -// Update the actual number of blocks that were applied the segment delta q. -void av1_cyclic_refresh_postencode(struct AV1_COMP *const cpi); - -// Set golden frame update interval, for 1 pass CBR mode. -void av1_cyclic_refresh_set_golden_update(struct AV1_COMP *const cpi); - -// Check if we should not update golden reference, based on past refresh stats. -void av1_cyclic_refresh_check_golden_update(struct AV1_COMP *const cpi); - -// Set/update global/frame level refresh parameters. -void av1_cyclic_refresh_update_parameters(struct AV1_COMP *const cpi); - -// Setup cyclic background refresh: set delta q and segmentation map. -void av1_cyclic_refresh_setup(struct AV1_COMP *const cpi); - -int av1_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr); - -void av1_cyclic_refresh_reset_resize(struct AV1_COMP *const cpi); - -static INLINE int cyclic_refresh_segment_id_boosted(int segment_id) { - return segment_id == CR_SEGMENT_ID_BOOST1 || - segment_id == CR_SEGMENT_ID_BOOST2; -} - -static INLINE int cyclic_refresh_segment_id(int segment_id) { - if (segment_id == CR_SEGMENT_ID_BOOST1) - return CR_SEGMENT_ID_BOOST1; - else if (segment_id == CR_SEGMENT_ID_BOOST2) - return CR_SEGMENT_ID_BOOST2; - else - return CR_SEGMENT_ID_BASE; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_AQ_CYCLICREFRESH_H_ diff --git a/third_party/aom/av1/encoder/aq_variance.c b/third_party/aom/av1/encoder/aq_variance.c deleted file mode 100644 index 58f906bdc..000000000 --- a/third_party/aom/av1/encoder/aq_variance.c +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include - -#include "aom_ports/mem.h" - -#include "av1/encoder/aq_variance.h" -#include "av1/common/seg_common.h" -#include "av1/encoder/encodeframe.h" -#include "av1/encoder/ratectrl.h" -#include "av1/encoder/rd.h" -#include "av1/encoder/segmentation.h" -#include "av1/encoder/dwt.h" -#include "aom_ports/system_state.h" - -static const double rate_ratio[MAX_SEGMENTS] = { 2.2, 1.7, 1.3, 1.0, - 0.9, .8, .7, .6 }; - -static const double deltaq_rate_ratio[MAX_SEGMENTS] = { 2.5, 2.0, 1.5, 1.0, - 0.75, 1.0, 1.0, 1.0 }; -#define ENERGY_MIN (-4) -#define ENERGY_MAX (1) -#define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1) -#define ENERGY_IN_BOUNDS(energy) \ - assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX) - -DECLARE_ALIGNED(16, static const uint8_t, av1_all_zeros[MAX_SB_SIZE]) = { 0 }; - -DECLARE_ALIGNED(16, static const uint16_t, - av1_highbd_all_zeros[MAX_SB_SIZE]) = { 0 }; - -static const int segment_id[ENERGY_SPAN] = { 0, 1, 1, 2, 3, 4 }; - -#define SEGMENT_ID(i) segment_id[(i)-ENERGY_MIN] - -void av1_vaq_frame_setup(AV1_COMP *cpi) { - AV1_COMMON *cm = &cpi->common; - struct segmentation *seg = &cm->seg; - int i; - - int resolution_change = - cm->prev_frame && (cm->width != cm->prev_frame->width || - cm->height != cm->prev_frame->height); - int avg_energy = (int)(cpi->twopass.mb_av_energy - 2); - double avg_ratio; - if (avg_energy > 7) avg_energy = 7; - if (avg_energy < 0) avg_energy = 0; - avg_ratio = rate_ratio[avg_energy]; - - if (resolution_change) { - memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - av1_clearall_segfeatures(seg); - aom_clear_system_state(); - av1_disable_segmentation(seg); - return; - } - if (frame_is_intra_only(cm) || cm->error_resilient_mode || - cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { - cpi->vaq_refresh = 1; - - av1_enable_segmentation(seg); - av1_clearall_segfeatures(seg); - - aom_clear_system_state(); - - for (i = 0; i < MAX_SEGMENTS; ++i) { - // Set up avg segment id to be 1.0 and adjust the other segments around - // it. - int qindex_delta = av1_compute_qdelta_by_rate( - &cpi->rc, cm->frame_type, cm->base_qindex, rate_ratio[i] / avg_ratio, - cm->seq_params.bit_depth); - - // We don't allow qindex 0 in a segment if the base value is not 0. - // Q index 0 (lossless) implies 4x4 encoding only and in AQ mode a segment - // Q delta is sometimes applied without going back around the rd loop. - // This could lead to an illegal combination of partition size and q. - if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) { - qindex_delta = -cm->base_qindex + 1; - } - - av1_set_segdata(seg, i, SEG_LVL_ALT_Q, qindex_delta); - av1_enable_segfeature(seg, i, SEG_LVL_ALT_Q); - } - } -} - -int av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) { - // This functions returns a score for the blocks local variance as calculated - // by: sum of the log of the (4x4 variances) of each subblock to the current - // block (x,bs) - // * 32 / number of pixels in the block_size. - // This is used for segmentation because to avoid situations in which a large - // block with a gentle gradient gets marked high variance even though each - // subblock has a low variance. This allows us to assign the same segment - // number for the same sorts of area regardless of how the partitioning goes. - - MACROBLOCKD *xd = &x->e_mbd; - double var = 0; - unsigned int sse; - int i, j; - - int right_overflow = - (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0; - int bottom_overflow = - (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0; - - const int bw = MI_SIZE * mi_size_wide[bs] - right_overflow; - const int bh = MI_SIZE * mi_size_high[bs] - bottom_overflow; - - aom_clear_system_state(); - - for (i = 0; i < bh; i += 4) { - for (j = 0; j < bw; j += 4) { - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - var += - log(1.0 + cpi->fn_ptr[BLOCK_4X4].vf( - x->plane[0].src.buf + i * x->plane[0].src.stride + j, - x->plane[0].src.stride, - CONVERT_TO_BYTEPTR(av1_highbd_all_zeros), 0, &sse) / - 16); - } else { - var += - log(1.0 + cpi->fn_ptr[BLOCK_4X4].vf( - x->plane[0].src.buf + i * x->plane[0].src.stride + j, - x->plane[0].src.stride, av1_all_zeros, 0, &sse) / - 16); - } - } - } - // Use average of 4x4 log variance. The range for 8 bit 0 - 9.704121561. - var /= (bw / 4 * bh / 4); - if (var > 7) var = 7; - - aom_clear_system_state(); - return (int)(var); -} - -#define DEFAULT_E_MIDPOINT 10.0 - -unsigned int haar_ac_energy(MACROBLOCK *x, BLOCK_SIZE bs) { - MACROBLOCKD *xd = &x->e_mbd; - int stride = x->plane[0].src.stride; - uint8_t *buf = x->plane[0].src.buf; - const int bw = MI_SIZE * mi_size_wide[bs]; - const int bh = MI_SIZE * mi_size_high[bs]; - int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH; - - int var = 0; - for (int r = 0; r < bh; r += 8) - for (int c = 0; c < bw; c += 8) { - var += av1_haar_ac_sad_8x8_uint8_input(buf + c + r * stride, stride, hbd); - } - - return (unsigned int)((uint64_t)var * 256) >> num_pels_log2_lookup[bs]; -} - -double av1_log_block_wavelet_energy(MACROBLOCK *x, BLOCK_SIZE bs) { - unsigned int haar_sad = haar_ac_energy(x, bs); - aom_clear_system_state(); - return log(haar_sad + 1.0); -} - -int av1_block_wavelet_energy_level(const AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bs) { - double energy, energy_midpoint; - aom_clear_system_state(); - energy_midpoint = (cpi->oxcf.pass == 2) ? cpi->twopass.frame_avg_haar_energy - : DEFAULT_E_MIDPOINT; - energy = av1_log_block_wavelet_energy(x, bs) - energy_midpoint; - return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX); -} - -int av1_compute_deltaq_from_energy_level(const AV1_COMP *const cpi, - int block_var_level) { - int rate_level; - const AV1_COMMON *const cm = &cpi->common; - - if (DELTAQ_MODULATION == 1) { - ENERGY_IN_BOUNDS(block_var_level); - rate_level = SEGMENT_ID(block_var_level); - } else { - rate_level = block_var_level; - } - int qindex_delta = av1_compute_qdelta_by_rate( - &cpi->rc, cm->frame_type, cm->base_qindex, deltaq_rate_ratio[rate_level], - cm->seq_params.bit_depth); - - if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) { - qindex_delta = -cm->base_qindex + 1; - } - return qindex_delta; -} diff --git a/third_party/aom/av1/encoder/aq_variance.h b/third_party/aom/av1/encoder/aq_variance.h deleted file mode 100644 index 2d22b663e..000000000 --- a/third_party/aom/av1/encoder/aq_variance.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_AQ_VARIANCE_H_ -#define AOM_AV1_ENCODER_AQ_VARIANCE_H_ - -#include "av1/encoder/encoder.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void av1_vaq_frame_setup(AV1_COMP *cpi); - -int av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs); -int av1_compute_deltaq_from_energy_level(const AV1_COMP *const cpi, - int block_var_level); -int av1_block_wavelet_energy_level(const AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bs); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_AQ_VARIANCE_H_ diff --git a/third_party/aom/av1/encoder/arm/neon/quantize_neon.c b/third_party/aom/av1/encoder/arm/neon/quantize_neon.c deleted file mode 100644 index 36e7d3370..000000000 --- a/third_party/aom/av1/encoder/arm/neon/quantize_neon.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include - -#include - -#include "aom_mem/aom_mem.h" - -#include "av1/common/quant_common.h" -#include "av1/common/seg_common.h" - -#include "av1/encoder/av1_quantize.h" -#include "av1/encoder/encoder.h" -#include "av1/encoder/rd.h" - -void av1_quantize_fp_neon(const int16_t *coeff_ptr, intptr_t count, - int skip_block, const int16_t *zbin_ptr, - const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, int16_t *qcoeff_ptr, - int16_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan, - const int16_t *iscan) { - // TODO(jingning) Decide the need of these arguments after the - // quantization process is completed. - (void)zbin_ptr; - (void)quant_shift_ptr; - (void)scan; - - if (!skip_block) { - // Quantization pass: All coefficients with index >= zero_flag are - // skippable. Note: zero_flag can be zero. - int i; - const int16x8_t v_zero = vdupq_n_s16(0); - const int16x8_t v_one = vdupq_n_s16(1); - int16x8_t v_eobmax_76543210 = vdupq_n_s16(-1); - int16x8_t v_round = vmovq_n_s16(round_ptr[1]); - int16x8_t v_quant = vmovq_n_s16(quant_ptr[1]); - int16x8_t v_dequant = vmovq_n_s16(dequant_ptr[1]); - // adjust for dc - v_round = vsetq_lane_s16(round_ptr[0], v_round, 0); - v_quant = vsetq_lane_s16(quant_ptr[0], v_quant, 0); - v_dequant = vsetq_lane_s16(dequant_ptr[0], v_dequant, 0); - // process dc and the first seven ac coeffs - { - const int16x8_t v_iscan = vld1q_s16(&iscan[0]); - const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[0]); - const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); - const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero); - const int32x4_t v_tmp_lo = - vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); - const int32x4_t v_tmp_hi = - vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant)); - const int16x8_t v_tmp2 = - vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16)); - const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero); - const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one); - const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1); - const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign); - const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); - const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant); - v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); - vst1q_s16(&qcoeff_ptr[0], v_qcoeff); - vst1q_s16(&dqcoeff_ptr[0], v_dqcoeff); - v_round = vmovq_n_s16(round_ptr[1]); - v_quant = vmovq_n_s16(quant_ptr[1]); - v_dequant = vmovq_n_s16(dequant_ptr[1]); - } - // now process the rest of the ac coeffs - for (i = 8; i < count; i += 8) { - const int16x8_t v_iscan = vld1q_s16(&iscan[i]); - const int16x8_t v_coeff = vld1q_s16(&coeff_ptr[i]); - const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); - const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero); - const int32x4_t v_tmp_lo = - vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); - const int32x4_t v_tmp_hi = - vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant)); - const int16x8_t v_tmp2 = - vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16)); - const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero); - const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one); - const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1); - const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign); - const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); - const int16x8_t v_dqcoeff = vmulq_s16(v_qcoeff, v_dequant); - v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); - vst1q_s16(&qcoeff_ptr[i], v_qcoeff); - vst1q_s16(&dqcoeff_ptr[i], v_dqcoeff); - } - { - const int16x4_t v_eobmax_3210 = vmax_s16( - vget_low_s16(v_eobmax_76543210), vget_high_s16(v_eobmax_76543210)); - const int64x1_t v_eobmax_xx32 = - vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32); - const int16x4_t v_eobmax_tmp = - vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32)); - const int64x1_t v_eobmax_xxx3 = - vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16); - const int16x4_t v_eobmax_final = - vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3)); - - *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0); - } - } else { - memset(qcoeff_ptr, 0, count * sizeof(int16_t)); - memset(dqcoeff_ptr, 0, count * sizeof(int16_t)); - *eob_ptr = 0; - } -} diff --git a/third_party/aom/av1/encoder/av1_fwd_txfm1d.c b/third_party/aom/av1/encoder/av1_fwd_txfm1d.c deleted file mode 100644 index 98505e0b1..000000000 --- a/third_party/aom/av1/encoder/av1_fwd_txfm1d.c +++ /dev/null @@ -1,1885 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include "av1/encoder/av1_fwd_txfm1d.h" -#include "av1/common/av1_txfm.h" - -void av1_fdct4_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - const int32_t size = 4; - const int32_t *cospi; - - int32_t stage = 0; - int32_t *bf0, *bf1; - int32_t step[4]; - - // stage 0; - av1_range_check_buf(stage, input, input, size, stage_range[stage]); - - // stage 1; - stage++; - bf1 = output; - bf1[0] = input[0] + input[3]; - bf1[1] = input[1] + input[2]; - bf1[2] = -input[2] + input[1]; - bf1[3] = -input[3] + input[0]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 2 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); - bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit); - bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 3 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = bf0[2]; - bf1[2] = bf0[1]; - bf1[3] = bf0[3]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); -} - -void av1_fdct8_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - const int32_t size = 8; - const int32_t *cospi; - - int32_t stage = 0; - int32_t *bf0, *bf1; - int32_t step[8]; - - // stage 0; - av1_range_check_buf(stage, input, input, size, stage_range[stage]); - - // stage 1; - stage++; - bf1 = output; - bf1[0] = input[0] + input[7]; - bf1[1] = input[1] + input[6]; - bf1[2] = input[2] + input[5]; - bf1[3] = input[3] + input[4]; - bf1[4] = -input[4] + input[3]; - bf1[5] = -input[5] + input[2]; - bf1[6] = -input[6] + input[1]; - bf1[7] = -input[7] + input[0]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 2 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0] + bf0[3]; - bf1[1] = bf0[1] + bf0[2]; - bf1[2] = -bf0[2] + bf0[1]; - bf1[3] = -bf0[3] + bf0[0]; - bf1[4] = bf0[4]; - bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit); - bf1[7] = bf0[7]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 3 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = step; - bf1 = output; - bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); - bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit); - bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit); - bf1[4] = bf0[4] + bf0[5]; - bf1[5] = -bf0[5] + bf0[4]; - bf1[6] = -bf0[6] + bf0[7]; - bf1[7] = bf0[7] + bf0[6]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 4 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit); - bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit); - bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 5 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = bf0[4]; - bf1[2] = bf0[2]; - bf1[3] = bf0[6]; - bf1[4] = bf0[1]; - bf1[5] = bf0[5]; - bf1[6] = bf0[3]; - bf1[7] = bf0[7]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); -} - -void av1_fdct16_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - const int32_t size = 16; - const int32_t *cospi; - - int32_t stage = 0; - int32_t *bf0, *bf1; - int32_t step[16]; - - // stage 0; - av1_range_check_buf(stage, input, input, size, stage_range[stage]); - - // stage 1; - stage++; - bf1 = output; - bf1[0] = input[0] + input[15]; - bf1[1] = input[1] + input[14]; - bf1[2] = input[2] + input[13]; - bf1[3] = input[3] + input[12]; - bf1[4] = input[4] + input[11]; - bf1[5] = input[5] + input[10]; - bf1[6] = input[6] + input[9]; - bf1[7] = input[7] + input[8]; - bf1[8] = -input[8] + input[7]; - bf1[9] = -input[9] + input[6]; - bf1[10] = -input[10] + input[5]; - bf1[11] = -input[11] + input[4]; - bf1[12] = -input[12] + input[3]; - bf1[13] = -input[13] + input[2]; - bf1[14] = -input[14] + input[1]; - bf1[15] = -input[15] + input[0]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 2 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0] + bf0[7]; - bf1[1] = bf0[1] + bf0[6]; - bf1[2] = bf0[2] + bf0[5]; - bf1[3] = bf0[3] + bf0[4]; - bf1[4] = -bf0[4] + bf0[3]; - bf1[5] = -bf0[5] + bf0[2]; - bf1[6] = -bf0[6] + bf0[1]; - bf1[7] = -bf0[7] + bf0[0]; - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); - bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); - bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit); - bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit); - bf1[14] = bf0[14]; - bf1[15] = bf0[15]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 3 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = step; - bf1 = output; - bf1[0] = bf0[0] + bf0[3]; - bf1[1] = bf0[1] + bf0[2]; - bf1[2] = -bf0[2] + bf0[1]; - bf1[3] = -bf0[3] + bf0[0]; - bf1[4] = bf0[4]; - bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit); - bf1[7] = bf0[7]; - bf1[8] = bf0[8] + bf0[11]; - bf1[9] = bf0[9] + bf0[10]; - bf1[10] = -bf0[10] + bf0[9]; - bf1[11] = -bf0[11] + bf0[8]; - bf1[12] = -bf0[12] + bf0[15]; - bf1[13] = -bf0[13] + bf0[14]; - bf1[14] = bf0[14] + bf0[13]; - bf1[15] = bf0[15] + bf0[12]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 4 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); - bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit); - bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit); - bf1[4] = bf0[4] + bf0[5]; - bf1[5] = -bf0[5] + bf0[4]; - bf1[6] = -bf0[6] + bf0[7]; - bf1[7] = bf0[7] + bf0[6]; - bf1[8] = bf0[8]; - bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit); - bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit); - bf1[11] = bf0[11]; - bf1[12] = bf0[12]; - bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit); - bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit); - bf1[15] = bf0[15]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 5 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit); - bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit); - bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit); - bf1[8] = bf0[8] + bf0[9]; - bf1[9] = -bf0[9] + bf0[8]; - bf1[10] = -bf0[10] + bf0[11]; - bf1[11] = bf0[11] + bf0[10]; - bf1[12] = bf0[12] + bf0[13]; - bf1[13] = -bf0[13] + bf0[12]; - bf1[14] = -bf0[14] + bf0[15]; - bf1[15] = bf0[15] + bf0[14]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 6 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit); - bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit); - bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit); - bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit); - bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit); - bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit); - bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit); - bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 7 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = bf0[8]; - bf1[2] = bf0[4]; - bf1[3] = bf0[12]; - bf1[4] = bf0[2]; - bf1[5] = bf0[10]; - bf1[6] = bf0[6]; - bf1[7] = bf0[14]; - bf1[8] = bf0[1]; - bf1[9] = bf0[9]; - bf1[10] = bf0[5]; - bf1[11] = bf0[13]; - bf1[12] = bf0[3]; - bf1[13] = bf0[11]; - bf1[14] = bf0[7]; - bf1[15] = bf0[15]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); -} - -void av1_fdct32_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - const int32_t size = 32; - const int32_t *cospi; - - int32_t stage = 0; - int32_t *bf0, *bf1; - int32_t step[32]; - - // stage 0; - av1_range_check_buf(stage, input, input, size, stage_range[stage]); - - // stage 1; - stage++; - bf1 = output; - bf1[0] = input[0] + input[31]; - bf1[1] = input[1] + input[30]; - bf1[2] = input[2] + input[29]; - bf1[3] = input[3] + input[28]; - bf1[4] = input[4] + input[27]; - bf1[5] = input[5] + input[26]; - bf1[6] = input[6] + input[25]; - bf1[7] = input[7] + input[24]; - bf1[8] = input[8] + input[23]; - bf1[9] = input[9] + input[22]; - bf1[10] = input[10] + input[21]; - bf1[11] = input[11] + input[20]; - bf1[12] = input[12] + input[19]; - bf1[13] = input[13] + input[18]; - bf1[14] = input[14] + input[17]; - bf1[15] = input[15] + input[16]; - bf1[16] = -input[16] + input[15]; - bf1[17] = -input[17] + input[14]; - bf1[18] = -input[18] + input[13]; - bf1[19] = -input[19] + input[12]; - bf1[20] = -input[20] + input[11]; - bf1[21] = -input[21] + input[10]; - bf1[22] = -input[22] + input[9]; - bf1[23] = -input[23] + input[8]; - bf1[24] = -input[24] + input[7]; - bf1[25] = -input[25] + input[6]; - bf1[26] = -input[26] + input[5]; - bf1[27] = -input[27] + input[4]; - bf1[28] = -input[28] + input[3]; - bf1[29] = -input[29] + input[2]; - bf1[30] = -input[30] + input[1]; - bf1[31] = -input[31] + input[0]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 2 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0] + bf0[15]; - bf1[1] = bf0[1] + bf0[14]; - bf1[2] = bf0[2] + bf0[13]; - bf1[3] = bf0[3] + bf0[12]; - bf1[4] = bf0[4] + bf0[11]; - bf1[5] = bf0[5] + bf0[10]; - bf1[6] = bf0[6] + bf0[9]; - bf1[7] = bf0[7] + bf0[8]; - bf1[8] = -bf0[8] + bf0[7]; - bf1[9] = -bf0[9] + bf0[6]; - bf1[10] = -bf0[10] + bf0[5]; - bf1[11] = -bf0[11] + bf0[4]; - bf1[12] = -bf0[12] + bf0[3]; - bf1[13] = -bf0[13] + bf0[2]; - bf1[14] = -bf0[14] + bf0[1]; - bf1[15] = -bf0[15] + bf0[0]; - bf1[16] = bf0[16]; - bf1[17] = bf0[17]; - bf1[18] = bf0[18]; - bf1[19] = bf0[19]; - bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit); - bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit); - bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit); - bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit); - bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit); - bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit); - bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit); - bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit); - bf1[28] = bf0[28]; - bf1[29] = bf0[29]; - bf1[30] = bf0[30]; - bf1[31] = bf0[31]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 3 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = step; - bf1 = output; - bf1[0] = bf0[0] + bf0[7]; - bf1[1] = bf0[1] + bf0[6]; - bf1[2] = bf0[2] + bf0[5]; - bf1[3] = bf0[3] + bf0[4]; - bf1[4] = -bf0[4] + bf0[3]; - bf1[5] = -bf0[5] + bf0[2]; - bf1[6] = -bf0[6] + bf0[1]; - bf1[7] = -bf0[7] + bf0[0]; - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); - bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); - bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit); - bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit); - bf1[14] = bf0[14]; - bf1[15] = bf0[15]; - bf1[16] = bf0[16] + bf0[23]; - bf1[17] = bf0[17] + bf0[22]; - bf1[18] = bf0[18] + bf0[21]; - bf1[19] = bf0[19] + bf0[20]; - bf1[20] = -bf0[20] + bf0[19]; - bf1[21] = -bf0[21] + bf0[18]; - bf1[22] = -bf0[22] + bf0[17]; - bf1[23] = -bf0[23] + bf0[16]; - bf1[24] = -bf0[24] + bf0[31]; - bf1[25] = -bf0[25] + bf0[30]; - bf1[26] = -bf0[26] + bf0[29]; - bf1[27] = -bf0[27] + bf0[28]; - bf1[28] = bf0[28] + bf0[27]; - bf1[29] = bf0[29] + bf0[26]; - bf1[30] = bf0[30] + bf0[25]; - bf1[31] = bf0[31] + bf0[24]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 4 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0] + bf0[3]; - bf1[1] = bf0[1] + bf0[2]; - bf1[2] = -bf0[2] + bf0[1]; - bf1[3] = -bf0[3] + bf0[0]; - bf1[4] = bf0[4]; - bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit); - bf1[7] = bf0[7]; - bf1[8] = bf0[8] + bf0[11]; - bf1[9] = bf0[9] + bf0[10]; - bf1[10] = -bf0[10] + bf0[9]; - bf1[11] = -bf0[11] + bf0[8]; - bf1[12] = -bf0[12] + bf0[15]; - bf1[13] = -bf0[13] + bf0[14]; - bf1[14] = bf0[14] + bf0[13]; - bf1[15] = bf0[15] + bf0[12]; - bf1[16] = bf0[16]; - bf1[17] = bf0[17]; - bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit); - bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit); - bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit); - bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit); - bf1[22] = bf0[22]; - bf1[23] = bf0[23]; - bf1[24] = bf0[24]; - bf1[25] = bf0[25]; - bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit); - bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit); - bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit); - bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit); - bf1[30] = bf0[30]; - bf1[31] = bf0[31]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 5 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = step; - bf1 = output; - bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); - bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit); - bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit); - bf1[4] = bf0[4] + bf0[5]; - bf1[5] = -bf0[5] + bf0[4]; - bf1[6] = -bf0[6] + bf0[7]; - bf1[7] = bf0[7] + bf0[6]; - bf1[8] = bf0[8]; - bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit); - bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit); - bf1[11] = bf0[11]; - bf1[12] = bf0[12]; - bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit); - bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit); - bf1[15] = bf0[15]; - bf1[16] = bf0[16] + bf0[19]; - bf1[17] = bf0[17] + bf0[18]; - bf1[18] = -bf0[18] + bf0[17]; - bf1[19] = -bf0[19] + bf0[16]; - bf1[20] = -bf0[20] + bf0[23]; - bf1[21] = -bf0[21] + bf0[22]; - bf1[22] = bf0[22] + bf0[21]; - bf1[23] = bf0[23] + bf0[20]; - bf1[24] = bf0[24] + bf0[27]; - bf1[25] = bf0[25] + bf0[26]; - bf1[26] = -bf0[26] + bf0[25]; - bf1[27] = -bf0[27] + bf0[24]; - bf1[28] = -bf0[28] + bf0[31]; - bf1[29] = -bf0[29] + bf0[30]; - bf1[30] = bf0[30] + bf0[29]; - bf1[31] = bf0[31] + bf0[28]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 6 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit); - bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit); - bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit); - bf1[8] = bf0[8] + bf0[9]; - bf1[9] = -bf0[9] + bf0[8]; - bf1[10] = -bf0[10] + bf0[11]; - bf1[11] = bf0[11] + bf0[10]; - bf1[12] = bf0[12] + bf0[13]; - bf1[13] = -bf0[13] + bf0[12]; - bf1[14] = -bf0[14] + bf0[15]; - bf1[15] = bf0[15] + bf0[14]; - bf1[16] = bf0[16]; - bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit); - bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit); - bf1[19] = bf0[19]; - bf1[20] = bf0[20]; - bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit); - bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit); - bf1[23] = bf0[23]; - bf1[24] = bf0[24]; - bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit); - bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit); - bf1[27] = bf0[27]; - bf1[28] = bf0[28]; - bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit); - bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit); - bf1[31] = bf0[31]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 7 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit); - bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit); - bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit); - bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit); - bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit); - bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit); - bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit); - bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit); - bf1[16] = bf0[16] + bf0[17]; - bf1[17] = -bf0[17] + bf0[16]; - bf1[18] = -bf0[18] + bf0[19]; - bf1[19] = bf0[19] + bf0[18]; - bf1[20] = bf0[20] + bf0[21]; - bf1[21] = -bf0[21] + bf0[20]; - bf1[22] = -bf0[22] + bf0[23]; - bf1[23] = bf0[23] + bf0[22]; - bf1[24] = bf0[24] + bf0[25]; - bf1[25] = -bf0[25] + bf0[24]; - bf1[26] = -bf0[26] + bf0[27]; - bf1[27] = bf0[27] + bf0[26]; - bf1[28] = bf0[28] + bf0[29]; - bf1[29] = -bf0[29] + bf0[28]; - bf1[30] = -bf0[30] + bf0[31]; - bf1[31] = bf0[31] + bf0[30]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 8 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = bf0[10]; - bf1[11] = bf0[11]; - bf1[12] = bf0[12]; - bf1[13] = bf0[13]; - bf1[14] = bf0[14]; - bf1[15] = bf0[15]; - bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit); - bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit); - bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit); - bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit); - bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit); - bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit); - bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit); - bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit); - bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit); - bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit); - bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit); - bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit); - bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit); - bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit); - bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit); - bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 9 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = bf0[16]; - bf1[2] = bf0[8]; - bf1[3] = bf0[24]; - bf1[4] = bf0[4]; - bf1[5] = bf0[20]; - bf1[6] = bf0[12]; - bf1[7] = bf0[28]; - bf1[8] = bf0[2]; - bf1[9] = bf0[18]; - bf1[10] = bf0[10]; - bf1[11] = bf0[26]; - bf1[12] = bf0[6]; - bf1[13] = bf0[22]; - bf1[14] = bf0[14]; - bf1[15] = bf0[30]; - bf1[16] = bf0[1]; - bf1[17] = bf0[17]; - bf1[18] = bf0[9]; - bf1[19] = bf0[25]; - bf1[20] = bf0[5]; - bf1[21] = bf0[21]; - bf1[22] = bf0[13]; - bf1[23] = bf0[29]; - bf1[24] = bf0[3]; - bf1[25] = bf0[19]; - bf1[26] = bf0[11]; - bf1[27] = bf0[27]; - bf1[28] = bf0[7]; - bf1[29] = bf0[23]; - bf1[30] = bf0[15]; - bf1[31] = bf0[31]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); -} - -void av1_fadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - int bit = cos_bit; - const int32_t *sinpi = sinpi_arr(bit); - int32_t x0, x1, x2, x3; - int32_t s0, s1, s2, s3, s4, s5, s6, s7; - - // stage 0 - av1_range_check_buf(0, input, input, 4, stage_range[0]); - x0 = input[0]; - x1 = input[1]; - x2 = input[2]; - x3 = input[3]; - - if (!(x0 | x1 | x2 | x3)) { - output[0] = output[1] = output[2] = output[3] = 0; - return; - } - - // stage 1 - s0 = range_check_value(sinpi[1] * x0, bit + stage_range[1]); - s1 = range_check_value(sinpi[4] * x0, bit + stage_range[1]); - s2 = range_check_value(sinpi[2] * x1, bit + stage_range[1]); - s3 = range_check_value(sinpi[1] * x1, bit + stage_range[1]); - s4 = range_check_value(sinpi[3] * x2, bit + stage_range[1]); - s5 = range_check_value(sinpi[4] * x3, bit + stage_range[1]); - s6 = range_check_value(sinpi[2] * x3, bit + stage_range[1]); - s7 = range_check_value(x0 + x1, stage_range[1]); - - // stage 2 - s7 = range_check_value(s7 - x3, stage_range[2]); - - // stage 3 - x0 = range_check_value(s0 + s2, bit + stage_range[3]); - x1 = range_check_value(sinpi[3] * s7, bit + stage_range[3]); - x2 = range_check_value(s1 - s3, bit + stage_range[3]); - x3 = range_check_value(s4, bit + stage_range[3]); - - // stage 4 - x0 = range_check_value(x0 + s5, bit + stage_range[4]); - x2 = range_check_value(x2 + s6, bit + stage_range[4]); - - // stage 5 - s0 = range_check_value(x0 + x3, bit + stage_range[5]); - s1 = range_check_value(x1, bit + stage_range[5]); - s2 = range_check_value(x2 - x3, bit + stage_range[5]); - s3 = range_check_value(x2 - x0, bit + stage_range[5]); - - // stage 6 - s3 = range_check_value(s3 + x3, bit + stage_range[6]); - - // 1-D transform scaling factor is sqrt(2). - output[0] = round_shift(s0, bit); - output[1] = round_shift(s1, bit); - output[2] = round_shift(s2, bit); - output[3] = round_shift(s3, bit); - av1_range_check_buf(6, input, output, 4, stage_range[6]); -} - -void av1_fadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - const int32_t size = 8; - const int32_t *cospi; - - int32_t stage = 0; - int32_t *bf0, *bf1; - int32_t step[8]; - - // stage 0; - av1_range_check_buf(stage, input, input, size, stage_range[stage]); - - // stage 1; - stage++; - assert(output != input); - bf1 = output; - bf1[0] = input[0]; - bf1[1] = -input[7]; - bf1[2] = -input[3]; - bf1[3] = input[4]; - bf1[4] = -input[1]; - bf1[5] = input[6]; - bf1[6] = input[2]; - bf1[7] = -input[5]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 2 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit); - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit); - bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 3 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0] + bf0[2]; - bf1[1] = bf0[1] + bf0[3]; - bf1[2] = bf0[0] - bf0[2]; - bf1[3] = bf0[1] - bf0[3]; - bf1[4] = bf0[4] + bf0[6]; - bf1[5] = bf0[5] + bf0[7]; - bf1[6] = bf0[4] - bf0[6]; - bf1[7] = bf0[5] - bf0[7]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 4 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit); - bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit); - bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit); - bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 5 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0] + bf0[4]; - bf1[1] = bf0[1] + bf0[5]; - bf1[2] = bf0[2] + bf0[6]; - bf1[3] = bf0[3] + bf0[7]; - bf1[4] = bf0[0] - bf0[4]; - bf1[5] = bf0[1] - bf0[5]; - bf1[6] = bf0[2] - bf0[6]; - bf1[7] = bf0[3] - bf0[7]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 6 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = half_btf(cospi[4], bf0[0], cospi[60], bf0[1], cos_bit); - bf1[1] = half_btf(cospi[60], bf0[0], -cospi[4], bf0[1], cos_bit); - bf1[2] = half_btf(cospi[20], bf0[2], cospi[44], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[44], bf0[2], -cospi[20], bf0[3], cos_bit); - bf1[4] = half_btf(cospi[36], bf0[4], cospi[28], bf0[5], cos_bit); - bf1[5] = half_btf(cospi[28], bf0[4], -cospi[36], bf0[5], cos_bit); - bf1[6] = half_btf(cospi[52], bf0[6], cospi[12], bf0[7], cos_bit); - bf1[7] = half_btf(cospi[12], bf0[6], -cospi[52], bf0[7], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 7 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[1]; - bf1[1] = bf0[6]; - bf1[2] = bf0[3]; - bf1[3] = bf0[4]; - bf1[4] = bf0[5]; - bf1[5] = bf0[2]; - bf1[6] = bf0[7]; - bf1[7] = bf0[0]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); -} - -void av1_fadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - const int32_t size = 16; - const int32_t *cospi; - - int32_t stage = 0; - int32_t *bf0, *bf1; - int32_t step[16]; - - // stage 0; - av1_range_check_buf(stage, input, input, size, stage_range[stage]); - - // stage 1; - stage++; - assert(output != input); - bf1 = output; - bf1[0] = input[0]; - bf1[1] = -input[15]; - bf1[2] = -input[7]; - bf1[3] = input[8]; - bf1[4] = -input[3]; - bf1[5] = input[12]; - bf1[6] = input[4]; - bf1[7] = -input[11]; - bf1[8] = -input[1]; - bf1[9] = input[14]; - bf1[10] = input[6]; - bf1[11] = -input[9]; - bf1[12] = input[2]; - bf1[13] = -input[13]; - bf1[14] = -input[5]; - bf1[15] = input[10]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 2 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = half_btf(cospi[32], bf0[2], cospi[32], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[32], bf0[2], -cospi[32], bf0[3], cos_bit); - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[7], cos_bit); - bf1[7] = half_btf(cospi[32], bf0[6], -cospi[32], bf0[7], cos_bit); - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = half_btf(cospi[32], bf0[10], cospi[32], bf0[11], cos_bit); - bf1[11] = half_btf(cospi[32], bf0[10], -cospi[32], bf0[11], cos_bit); - bf1[12] = bf0[12]; - bf1[13] = bf0[13]; - bf1[14] = half_btf(cospi[32], bf0[14], cospi[32], bf0[15], cos_bit); - bf1[15] = half_btf(cospi[32], bf0[14], -cospi[32], bf0[15], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 3 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0] + bf0[2]; - bf1[1] = bf0[1] + bf0[3]; - bf1[2] = bf0[0] - bf0[2]; - bf1[3] = bf0[1] - bf0[3]; - bf1[4] = bf0[4] + bf0[6]; - bf1[5] = bf0[5] + bf0[7]; - bf1[6] = bf0[4] - bf0[6]; - bf1[7] = bf0[5] - bf0[7]; - bf1[8] = bf0[8] + bf0[10]; - bf1[9] = bf0[9] + bf0[11]; - bf1[10] = bf0[8] - bf0[10]; - bf1[11] = bf0[9] - bf0[11]; - bf1[12] = bf0[12] + bf0[14]; - bf1[13] = bf0[13] + bf0[15]; - bf1[14] = bf0[12] - bf0[14]; - bf1[15] = bf0[13] - bf0[15]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 4 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = half_btf(cospi[16], bf0[4], cospi[48], bf0[5], cos_bit); - bf1[5] = half_btf(cospi[48], bf0[4], -cospi[16], bf0[5], cos_bit); - bf1[6] = half_btf(-cospi[48], bf0[6], cospi[16], bf0[7], cos_bit); - bf1[7] = half_btf(cospi[16], bf0[6], cospi[48], bf0[7], cos_bit); - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = bf0[10]; - bf1[11] = bf0[11]; - bf1[12] = half_btf(cospi[16], bf0[12], cospi[48], bf0[13], cos_bit); - bf1[13] = half_btf(cospi[48], bf0[12], -cospi[16], bf0[13], cos_bit); - bf1[14] = half_btf(-cospi[48], bf0[14], cospi[16], bf0[15], cos_bit); - bf1[15] = half_btf(cospi[16], bf0[14], cospi[48], bf0[15], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 5 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0] + bf0[4]; - bf1[1] = bf0[1] + bf0[5]; - bf1[2] = bf0[2] + bf0[6]; - bf1[3] = bf0[3] + bf0[7]; - bf1[4] = bf0[0] - bf0[4]; - bf1[5] = bf0[1] - bf0[5]; - bf1[6] = bf0[2] - bf0[6]; - bf1[7] = bf0[3] - bf0[7]; - bf1[8] = bf0[8] + bf0[12]; - bf1[9] = bf0[9] + bf0[13]; - bf1[10] = bf0[10] + bf0[14]; - bf1[11] = bf0[11] + bf0[15]; - bf1[12] = bf0[8] - bf0[12]; - bf1[13] = bf0[9] - bf0[13]; - bf1[14] = bf0[10] - bf0[14]; - bf1[15] = bf0[11] - bf0[15]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 6 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = half_btf(cospi[8], bf0[8], cospi[56], bf0[9], cos_bit); - bf1[9] = half_btf(cospi[56], bf0[8], -cospi[8], bf0[9], cos_bit); - bf1[10] = half_btf(cospi[40], bf0[10], cospi[24], bf0[11], cos_bit); - bf1[11] = half_btf(cospi[24], bf0[10], -cospi[40], bf0[11], cos_bit); - bf1[12] = half_btf(-cospi[56], bf0[12], cospi[8], bf0[13], cos_bit); - bf1[13] = half_btf(cospi[8], bf0[12], cospi[56], bf0[13], cos_bit); - bf1[14] = half_btf(-cospi[24], bf0[14], cospi[40], bf0[15], cos_bit); - bf1[15] = half_btf(cospi[40], bf0[14], cospi[24], bf0[15], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 7 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0] + bf0[8]; - bf1[1] = bf0[1] + bf0[9]; - bf1[2] = bf0[2] + bf0[10]; - bf1[3] = bf0[3] + bf0[11]; - bf1[4] = bf0[4] + bf0[12]; - bf1[5] = bf0[5] + bf0[13]; - bf1[6] = bf0[6] + bf0[14]; - bf1[7] = bf0[7] + bf0[15]; - bf1[8] = bf0[0] - bf0[8]; - bf1[9] = bf0[1] - bf0[9]; - bf1[10] = bf0[2] - bf0[10]; - bf1[11] = bf0[3] - bf0[11]; - bf1[12] = bf0[4] - bf0[12]; - bf1[13] = bf0[5] - bf0[13]; - bf1[14] = bf0[6] - bf0[14]; - bf1[15] = bf0[7] - bf0[15]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 8 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = half_btf(cospi[2], bf0[0], cospi[62], bf0[1], cos_bit); - bf1[1] = half_btf(cospi[62], bf0[0], -cospi[2], bf0[1], cos_bit); - bf1[2] = half_btf(cospi[10], bf0[2], cospi[54], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[54], bf0[2], -cospi[10], bf0[3], cos_bit); - bf1[4] = half_btf(cospi[18], bf0[4], cospi[46], bf0[5], cos_bit); - bf1[5] = half_btf(cospi[46], bf0[4], -cospi[18], bf0[5], cos_bit); - bf1[6] = half_btf(cospi[26], bf0[6], cospi[38], bf0[7], cos_bit); - bf1[7] = half_btf(cospi[38], bf0[6], -cospi[26], bf0[7], cos_bit); - bf1[8] = half_btf(cospi[34], bf0[8], cospi[30], bf0[9], cos_bit); - bf1[9] = half_btf(cospi[30], bf0[8], -cospi[34], bf0[9], cos_bit); - bf1[10] = half_btf(cospi[42], bf0[10], cospi[22], bf0[11], cos_bit); - bf1[11] = half_btf(cospi[22], bf0[10], -cospi[42], bf0[11], cos_bit); - bf1[12] = half_btf(cospi[50], bf0[12], cospi[14], bf0[13], cos_bit); - bf1[13] = half_btf(cospi[14], bf0[12], -cospi[50], bf0[13], cos_bit); - bf1[14] = half_btf(cospi[58], bf0[14], cospi[6], bf0[15], cos_bit); - bf1[15] = half_btf(cospi[6], bf0[14], -cospi[58], bf0[15], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 9 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[1]; - bf1[1] = bf0[14]; - bf1[2] = bf0[3]; - bf1[3] = bf0[12]; - bf1[4] = bf0[5]; - bf1[5] = bf0[10]; - bf1[6] = bf0[7]; - bf1[7] = bf0[8]; - bf1[8] = bf0[9]; - bf1[9] = bf0[6]; - bf1[10] = bf0[11]; - bf1[11] = bf0[4]; - bf1[12] = bf0[13]; - bf1[13] = bf0[2]; - bf1[14] = bf0[15]; - bf1[15] = bf0[0]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); -} - -void av1_fidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - (void)cos_bit; - for (int i = 0; i < 4; ++i) - output[i] = round_shift((int64_t)input[i] * NewSqrt2, NewSqrt2Bits); - assert(stage_range[0] + NewSqrt2Bits <= 32); - av1_range_check_buf(0, input, output, 4, stage_range[0]); -} - -void av1_fidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - (void)cos_bit; - for (int i = 0; i < 8; ++i) output[i] = input[i] * 2; - av1_range_check_buf(0, input, output, 8, stage_range[0]); -} - -void av1_fidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - (void)cos_bit; - for (int i = 0; i < 16; ++i) - output[i] = round_shift((int64_t)input[i] * 2 * NewSqrt2, NewSqrt2Bits); - assert(stage_range[0] + NewSqrt2Bits <= 32); - av1_range_check_buf(0, input, output, 16, stage_range[0]); -} - -void av1_fidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - (void)cos_bit; - for (int i = 0; i < 32; ++i) output[i] = input[i] * 4; - av1_range_check_buf(0, input, output, 32, stage_range[0]); -} - -void av1_fdct64_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range) { - const int32_t size = 64; - const int32_t *cospi; - - int32_t stage = 0; - int32_t *bf0, *bf1; - int32_t step[64]; - - // stage 0; - av1_range_check_buf(stage, input, input, size, stage_range[stage]); - - // stage 1; - stage++; - bf1 = output; - bf1[0] = input[0] + input[63]; - bf1[1] = input[1] + input[62]; - bf1[2] = input[2] + input[61]; - bf1[3] = input[3] + input[60]; - bf1[4] = input[4] + input[59]; - bf1[5] = input[5] + input[58]; - bf1[6] = input[6] + input[57]; - bf1[7] = input[7] + input[56]; - bf1[8] = input[8] + input[55]; - bf1[9] = input[9] + input[54]; - bf1[10] = input[10] + input[53]; - bf1[11] = input[11] + input[52]; - bf1[12] = input[12] + input[51]; - bf1[13] = input[13] + input[50]; - bf1[14] = input[14] + input[49]; - bf1[15] = input[15] + input[48]; - bf1[16] = input[16] + input[47]; - bf1[17] = input[17] + input[46]; - bf1[18] = input[18] + input[45]; - bf1[19] = input[19] + input[44]; - bf1[20] = input[20] + input[43]; - bf1[21] = input[21] + input[42]; - bf1[22] = input[22] + input[41]; - bf1[23] = input[23] + input[40]; - bf1[24] = input[24] + input[39]; - bf1[25] = input[25] + input[38]; - bf1[26] = input[26] + input[37]; - bf1[27] = input[27] + input[36]; - bf1[28] = input[28] + input[35]; - bf1[29] = input[29] + input[34]; - bf1[30] = input[30] + input[33]; - bf1[31] = input[31] + input[32]; - bf1[32] = -input[32] + input[31]; - bf1[33] = -input[33] + input[30]; - bf1[34] = -input[34] + input[29]; - bf1[35] = -input[35] + input[28]; - bf1[36] = -input[36] + input[27]; - bf1[37] = -input[37] + input[26]; - bf1[38] = -input[38] + input[25]; - bf1[39] = -input[39] + input[24]; - bf1[40] = -input[40] + input[23]; - bf1[41] = -input[41] + input[22]; - bf1[42] = -input[42] + input[21]; - bf1[43] = -input[43] + input[20]; - bf1[44] = -input[44] + input[19]; - bf1[45] = -input[45] + input[18]; - bf1[46] = -input[46] + input[17]; - bf1[47] = -input[47] + input[16]; - bf1[48] = -input[48] + input[15]; - bf1[49] = -input[49] + input[14]; - bf1[50] = -input[50] + input[13]; - bf1[51] = -input[51] + input[12]; - bf1[52] = -input[52] + input[11]; - bf1[53] = -input[53] + input[10]; - bf1[54] = -input[54] + input[9]; - bf1[55] = -input[55] + input[8]; - bf1[56] = -input[56] + input[7]; - bf1[57] = -input[57] + input[6]; - bf1[58] = -input[58] + input[5]; - bf1[59] = -input[59] + input[4]; - bf1[60] = -input[60] + input[3]; - bf1[61] = -input[61] + input[2]; - bf1[62] = -input[62] + input[1]; - bf1[63] = -input[63] + input[0]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 2 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0] + bf0[31]; - bf1[1] = bf0[1] + bf0[30]; - bf1[2] = bf0[2] + bf0[29]; - bf1[3] = bf0[3] + bf0[28]; - bf1[4] = bf0[4] + bf0[27]; - bf1[5] = bf0[5] + bf0[26]; - bf1[6] = bf0[6] + bf0[25]; - bf1[7] = bf0[7] + bf0[24]; - bf1[8] = bf0[8] + bf0[23]; - bf1[9] = bf0[9] + bf0[22]; - bf1[10] = bf0[10] + bf0[21]; - bf1[11] = bf0[11] + bf0[20]; - bf1[12] = bf0[12] + bf0[19]; - bf1[13] = bf0[13] + bf0[18]; - bf1[14] = bf0[14] + bf0[17]; - bf1[15] = bf0[15] + bf0[16]; - bf1[16] = -bf0[16] + bf0[15]; - bf1[17] = -bf0[17] + bf0[14]; - bf1[18] = -bf0[18] + bf0[13]; - bf1[19] = -bf0[19] + bf0[12]; - bf1[20] = -bf0[20] + bf0[11]; - bf1[21] = -bf0[21] + bf0[10]; - bf1[22] = -bf0[22] + bf0[9]; - bf1[23] = -bf0[23] + bf0[8]; - bf1[24] = -bf0[24] + bf0[7]; - bf1[25] = -bf0[25] + bf0[6]; - bf1[26] = -bf0[26] + bf0[5]; - bf1[27] = -bf0[27] + bf0[4]; - bf1[28] = -bf0[28] + bf0[3]; - bf1[29] = -bf0[29] + bf0[2]; - bf1[30] = -bf0[30] + bf0[1]; - bf1[31] = -bf0[31] + bf0[0]; - bf1[32] = bf0[32]; - bf1[33] = bf0[33]; - bf1[34] = bf0[34]; - bf1[35] = bf0[35]; - bf1[36] = bf0[36]; - bf1[37] = bf0[37]; - bf1[38] = bf0[38]; - bf1[39] = bf0[39]; - bf1[40] = half_btf(-cospi[32], bf0[40], cospi[32], bf0[55], cos_bit); - bf1[41] = half_btf(-cospi[32], bf0[41], cospi[32], bf0[54], cos_bit); - bf1[42] = half_btf(-cospi[32], bf0[42], cospi[32], bf0[53], cos_bit); - bf1[43] = half_btf(-cospi[32], bf0[43], cospi[32], bf0[52], cos_bit); - bf1[44] = half_btf(-cospi[32], bf0[44], cospi[32], bf0[51], cos_bit); - bf1[45] = half_btf(-cospi[32], bf0[45], cospi[32], bf0[50], cos_bit); - bf1[46] = half_btf(-cospi[32], bf0[46], cospi[32], bf0[49], cos_bit); - bf1[47] = half_btf(-cospi[32], bf0[47], cospi[32], bf0[48], cos_bit); - bf1[48] = half_btf(cospi[32], bf0[48], cospi[32], bf0[47], cos_bit); - bf1[49] = half_btf(cospi[32], bf0[49], cospi[32], bf0[46], cos_bit); - bf1[50] = half_btf(cospi[32], bf0[50], cospi[32], bf0[45], cos_bit); - bf1[51] = half_btf(cospi[32], bf0[51], cospi[32], bf0[44], cos_bit); - bf1[52] = half_btf(cospi[32], bf0[52], cospi[32], bf0[43], cos_bit); - bf1[53] = half_btf(cospi[32], bf0[53], cospi[32], bf0[42], cos_bit); - bf1[54] = half_btf(cospi[32], bf0[54], cospi[32], bf0[41], cos_bit); - bf1[55] = half_btf(cospi[32], bf0[55], cospi[32], bf0[40], cos_bit); - bf1[56] = bf0[56]; - bf1[57] = bf0[57]; - bf1[58] = bf0[58]; - bf1[59] = bf0[59]; - bf1[60] = bf0[60]; - bf1[61] = bf0[61]; - bf1[62] = bf0[62]; - bf1[63] = bf0[63]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 3 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = step; - bf1 = output; - bf1[0] = bf0[0] + bf0[15]; - bf1[1] = bf0[1] + bf0[14]; - bf1[2] = bf0[2] + bf0[13]; - bf1[3] = bf0[3] + bf0[12]; - bf1[4] = bf0[4] + bf0[11]; - bf1[5] = bf0[5] + bf0[10]; - bf1[6] = bf0[6] + bf0[9]; - bf1[7] = bf0[7] + bf0[8]; - bf1[8] = -bf0[8] + bf0[7]; - bf1[9] = -bf0[9] + bf0[6]; - bf1[10] = -bf0[10] + bf0[5]; - bf1[11] = -bf0[11] + bf0[4]; - bf1[12] = -bf0[12] + bf0[3]; - bf1[13] = -bf0[13] + bf0[2]; - bf1[14] = -bf0[14] + bf0[1]; - bf1[15] = -bf0[15] + bf0[0]; - bf1[16] = bf0[16]; - bf1[17] = bf0[17]; - bf1[18] = bf0[18]; - bf1[19] = bf0[19]; - bf1[20] = half_btf(-cospi[32], bf0[20], cospi[32], bf0[27], cos_bit); - bf1[21] = half_btf(-cospi[32], bf0[21], cospi[32], bf0[26], cos_bit); - bf1[22] = half_btf(-cospi[32], bf0[22], cospi[32], bf0[25], cos_bit); - bf1[23] = half_btf(-cospi[32], bf0[23], cospi[32], bf0[24], cos_bit); - bf1[24] = half_btf(cospi[32], bf0[24], cospi[32], bf0[23], cos_bit); - bf1[25] = half_btf(cospi[32], bf0[25], cospi[32], bf0[22], cos_bit); - bf1[26] = half_btf(cospi[32], bf0[26], cospi[32], bf0[21], cos_bit); - bf1[27] = half_btf(cospi[32], bf0[27], cospi[32], bf0[20], cos_bit); - bf1[28] = bf0[28]; - bf1[29] = bf0[29]; - bf1[30] = bf0[30]; - bf1[31] = bf0[31]; - bf1[32] = bf0[32] + bf0[47]; - bf1[33] = bf0[33] + bf0[46]; - bf1[34] = bf0[34] + bf0[45]; - bf1[35] = bf0[35] + bf0[44]; - bf1[36] = bf0[36] + bf0[43]; - bf1[37] = bf0[37] + bf0[42]; - bf1[38] = bf0[38] + bf0[41]; - bf1[39] = bf0[39] + bf0[40]; - bf1[40] = -bf0[40] + bf0[39]; - bf1[41] = -bf0[41] + bf0[38]; - bf1[42] = -bf0[42] + bf0[37]; - bf1[43] = -bf0[43] + bf0[36]; - bf1[44] = -bf0[44] + bf0[35]; - bf1[45] = -bf0[45] + bf0[34]; - bf1[46] = -bf0[46] + bf0[33]; - bf1[47] = -bf0[47] + bf0[32]; - bf1[48] = -bf0[48] + bf0[63]; - bf1[49] = -bf0[49] + bf0[62]; - bf1[50] = -bf0[50] + bf0[61]; - bf1[51] = -bf0[51] + bf0[60]; - bf1[52] = -bf0[52] + bf0[59]; - bf1[53] = -bf0[53] + bf0[58]; - bf1[54] = -bf0[54] + bf0[57]; - bf1[55] = -bf0[55] + bf0[56]; - bf1[56] = bf0[56] + bf0[55]; - bf1[57] = bf0[57] + bf0[54]; - bf1[58] = bf0[58] + bf0[53]; - bf1[59] = bf0[59] + bf0[52]; - bf1[60] = bf0[60] + bf0[51]; - bf1[61] = bf0[61] + bf0[50]; - bf1[62] = bf0[62] + bf0[49]; - bf1[63] = bf0[63] + bf0[48]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 4 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0] + bf0[7]; - bf1[1] = bf0[1] + bf0[6]; - bf1[2] = bf0[2] + bf0[5]; - bf1[3] = bf0[3] + bf0[4]; - bf1[4] = -bf0[4] + bf0[3]; - bf1[5] = -bf0[5] + bf0[2]; - bf1[6] = -bf0[6] + bf0[1]; - bf1[7] = -bf0[7] + bf0[0]; - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = half_btf(-cospi[32], bf0[10], cospi[32], bf0[13], cos_bit); - bf1[11] = half_btf(-cospi[32], bf0[11], cospi[32], bf0[12], cos_bit); - bf1[12] = half_btf(cospi[32], bf0[12], cospi[32], bf0[11], cos_bit); - bf1[13] = half_btf(cospi[32], bf0[13], cospi[32], bf0[10], cos_bit); - bf1[14] = bf0[14]; - bf1[15] = bf0[15]; - bf1[16] = bf0[16] + bf0[23]; - bf1[17] = bf0[17] + bf0[22]; - bf1[18] = bf0[18] + bf0[21]; - bf1[19] = bf0[19] + bf0[20]; - bf1[20] = -bf0[20] + bf0[19]; - bf1[21] = -bf0[21] + bf0[18]; - bf1[22] = -bf0[22] + bf0[17]; - bf1[23] = -bf0[23] + bf0[16]; - bf1[24] = -bf0[24] + bf0[31]; - bf1[25] = -bf0[25] + bf0[30]; - bf1[26] = -bf0[26] + bf0[29]; - bf1[27] = -bf0[27] + bf0[28]; - bf1[28] = bf0[28] + bf0[27]; - bf1[29] = bf0[29] + bf0[26]; - bf1[30] = bf0[30] + bf0[25]; - bf1[31] = bf0[31] + bf0[24]; - bf1[32] = bf0[32]; - bf1[33] = bf0[33]; - bf1[34] = bf0[34]; - bf1[35] = bf0[35]; - bf1[36] = half_btf(-cospi[16], bf0[36], cospi[48], bf0[59], cos_bit); - bf1[37] = half_btf(-cospi[16], bf0[37], cospi[48], bf0[58], cos_bit); - bf1[38] = half_btf(-cospi[16], bf0[38], cospi[48], bf0[57], cos_bit); - bf1[39] = half_btf(-cospi[16], bf0[39], cospi[48], bf0[56], cos_bit); - bf1[40] = half_btf(-cospi[48], bf0[40], -cospi[16], bf0[55], cos_bit); - bf1[41] = half_btf(-cospi[48], bf0[41], -cospi[16], bf0[54], cos_bit); - bf1[42] = half_btf(-cospi[48], bf0[42], -cospi[16], bf0[53], cos_bit); - bf1[43] = half_btf(-cospi[48], bf0[43], -cospi[16], bf0[52], cos_bit); - bf1[44] = bf0[44]; - bf1[45] = bf0[45]; - bf1[46] = bf0[46]; - bf1[47] = bf0[47]; - bf1[48] = bf0[48]; - bf1[49] = bf0[49]; - bf1[50] = bf0[50]; - bf1[51] = bf0[51]; - bf1[52] = half_btf(cospi[48], bf0[52], -cospi[16], bf0[43], cos_bit); - bf1[53] = half_btf(cospi[48], bf0[53], -cospi[16], bf0[42], cos_bit); - bf1[54] = half_btf(cospi[48], bf0[54], -cospi[16], bf0[41], cos_bit); - bf1[55] = half_btf(cospi[48], bf0[55], -cospi[16], bf0[40], cos_bit); - bf1[56] = half_btf(cospi[16], bf0[56], cospi[48], bf0[39], cos_bit); - bf1[57] = half_btf(cospi[16], bf0[57], cospi[48], bf0[38], cos_bit); - bf1[58] = half_btf(cospi[16], bf0[58], cospi[48], bf0[37], cos_bit); - bf1[59] = half_btf(cospi[16], bf0[59], cospi[48], bf0[36], cos_bit); - bf1[60] = bf0[60]; - bf1[61] = bf0[61]; - bf1[62] = bf0[62]; - bf1[63] = bf0[63]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 5 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = step; - bf1 = output; - bf1[0] = bf0[0] + bf0[3]; - bf1[1] = bf0[1] + bf0[2]; - bf1[2] = -bf0[2] + bf0[1]; - bf1[3] = -bf0[3] + bf0[0]; - bf1[4] = bf0[4]; - bf1[5] = half_btf(-cospi[32], bf0[5], cospi[32], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[32], bf0[6], cospi[32], bf0[5], cos_bit); - bf1[7] = bf0[7]; - bf1[8] = bf0[8] + bf0[11]; - bf1[9] = bf0[9] + bf0[10]; - bf1[10] = -bf0[10] + bf0[9]; - bf1[11] = -bf0[11] + bf0[8]; - bf1[12] = -bf0[12] + bf0[15]; - bf1[13] = -bf0[13] + bf0[14]; - bf1[14] = bf0[14] + bf0[13]; - bf1[15] = bf0[15] + bf0[12]; - bf1[16] = bf0[16]; - bf1[17] = bf0[17]; - bf1[18] = half_btf(-cospi[16], bf0[18], cospi[48], bf0[29], cos_bit); - bf1[19] = half_btf(-cospi[16], bf0[19], cospi[48], bf0[28], cos_bit); - bf1[20] = half_btf(-cospi[48], bf0[20], -cospi[16], bf0[27], cos_bit); - bf1[21] = half_btf(-cospi[48], bf0[21], -cospi[16], bf0[26], cos_bit); - bf1[22] = bf0[22]; - bf1[23] = bf0[23]; - bf1[24] = bf0[24]; - bf1[25] = bf0[25]; - bf1[26] = half_btf(cospi[48], bf0[26], -cospi[16], bf0[21], cos_bit); - bf1[27] = half_btf(cospi[48], bf0[27], -cospi[16], bf0[20], cos_bit); - bf1[28] = half_btf(cospi[16], bf0[28], cospi[48], bf0[19], cos_bit); - bf1[29] = half_btf(cospi[16], bf0[29], cospi[48], bf0[18], cos_bit); - bf1[30] = bf0[30]; - bf1[31] = bf0[31]; - bf1[32] = bf0[32] + bf0[39]; - bf1[33] = bf0[33] + bf0[38]; - bf1[34] = bf0[34] + bf0[37]; - bf1[35] = bf0[35] + bf0[36]; - bf1[36] = -bf0[36] + bf0[35]; - bf1[37] = -bf0[37] + bf0[34]; - bf1[38] = -bf0[38] + bf0[33]; - bf1[39] = -bf0[39] + bf0[32]; - bf1[40] = -bf0[40] + bf0[47]; - bf1[41] = -bf0[41] + bf0[46]; - bf1[42] = -bf0[42] + bf0[45]; - bf1[43] = -bf0[43] + bf0[44]; - bf1[44] = bf0[44] + bf0[43]; - bf1[45] = bf0[45] + bf0[42]; - bf1[46] = bf0[46] + bf0[41]; - bf1[47] = bf0[47] + bf0[40]; - bf1[48] = bf0[48] + bf0[55]; - bf1[49] = bf0[49] + bf0[54]; - bf1[50] = bf0[50] + bf0[53]; - bf1[51] = bf0[51] + bf0[52]; - bf1[52] = -bf0[52] + bf0[51]; - bf1[53] = -bf0[53] + bf0[50]; - bf1[54] = -bf0[54] + bf0[49]; - bf1[55] = -bf0[55] + bf0[48]; - bf1[56] = -bf0[56] + bf0[63]; - bf1[57] = -bf0[57] + bf0[62]; - bf1[58] = -bf0[58] + bf0[61]; - bf1[59] = -bf0[59] + bf0[60]; - bf1[60] = bf0[60] + bf0[59]; - bf1[61] = bf0[61] + bf0[58]; - bf1[62] = bf0[62] + bf0[57]; - bf1[63] = bf0[63] + bf0[56]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 6 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = half_btf(cospi[32], bf0[0], cospi[32], bf0[1], cos_bit); - bf1[1] = half_btf(-cospi[32], bf0[1], cospi[32], bf0[0], cos_bit); - bf1[2] = half_btf(cospi[48], bf0[2], cospi[16], bf0[3], cos_bit); - bf1[3] = half_btf(cospi[48], bf0[3], -cospi[16], bf0[2], cos_bit); - bf1[4] = bf0[4] + bf0[5]; - bf1[5] = -bf0[5] + bf0[4]; - bf1[6] = -bf0[6] + bf0[7]; - bf1[7] = bf0[7] + bf0[6]; - bf1[8] = bf0[8]; - bf1[9] = half_btf(-cospi[16], bf0[9], cospi[48], bf0[14], cos_bit); - bf1[10] = half_btf(-cospi[48], bf0[10], -cospi[16], bf0[13], cos_bit); - bf1[11] = bf0[11]; - bf1[12] = bf0[12]; - bf1[13] = half_btf(cospi[48], bf0[13], -cospi[16], bf0[10], cos_bit); - bf1[14] = half_btf(cospi[16], bf0[14], cospi[48], bf0[9], cos_bit); - bf1[15] = bf0[15]; - bf1[16] = bf0[16] + bf0[19]; - bf1[17] = bf0[17] + bf0[18]; - bf1[18] = -bf0[18] + bf0[17]; - bf1[19] = -bf0[19] + bf0[16]; - bf1[20] = -bf0[20] + bf0[23]; - bf1[21] = -bf0[21] + bf0[22]; - bf1[22] = bf0[22] + bf0[21]; - bf1[23] = bf0[23] + bf0[20]; - bf1[24] = bf0[24] + bf0[27]; - bf1[25] = bf0[25] + bf0[26]; - bf1[26] = -bf0[26] + bf0[25]; - bf1[27] = -bf0[27] + bf0[24]; - bf1[28] = -bf0[28] + bf0[31]; - bf1[29] = -bf0[29] + bf0[30]; - bf1[30] = bf0[30] + bf0[29]; - bf1[31] = bf0[31] + bf0[28]; - bf1[32] = bf0[32]; - bf1[33] = bf0[33]; - bf1[34] = half_btf(-cospi[8], bf0[34], cospi[56], bf0[61], cos_bit); - bf1[35] = half_btf(-cospi[8], bf0[35], cospi[56], bf0[60], cos_bit); - bf1[36] = half_btf(-cospi[56], bf0[36], -cospi[8], bf0[59], cos_bit); - bf1[37] = half_btf(-cospi[56], bf0[37], -cospi[8], bf0[58], cos_bit); - bf1[38] = bf0[38]; - bf1[39] = bf0[39]; - bf1[40] = bf0[40]; - bf1[41] = bf0[41]; - bf1[42] = half_btf(-cospi[40], bf0[42], cospi[24], bf0[53], cos_bit); - bf1[43] = half_btf(-cospi[40], bf0[43], cospi[24], bf0[52], cos_bit); - bf1[44] = half_btf(-cospi[24], bf0[44], -cospi[40], bf0[51], cos_bit); - bf1[45] = half_btf(-cospi[24], bf0[45], -cospi[40], bf0[50], cos_bit); - bf1[46] = bf0[46]; - bf1[47] = bf0[47]; - bf1[48] = bf0[48]; - bf1[49] = bf0[49]; - bf1[50] = half_btf(cospi[24], bf0[50], -cospi[40], bf0[45], cos_bit); - bf1[51] = half_btf(cospi[24], bf0[51], -cospi[40], bf0[44], cos_bit); - bf1[52] = half_btf(cospi[40], bf0[52], cospi[24], bf0[43], cos_bit); - bf1[53] = half_btf(cospi[40], bf0[53], cospi[24], bf0[42], cos_bit); - bf1[54] = bf0[54]; - bf1[55] = bf0[55]; - bf1[56] = bf0[56]; - bf1[57] = bf0[57]; - bf1[58] = half_btf(cospi[56], bf0[58], -cospi[8], bf0[37], cos_bit); - bf1[59] = half_btf(cospi[56], bf0[59], -cospi[8], bf0[36], cos_bit); - bf1[60] = half_btf(cospi[8], bf0[60], cospi[56], bf0[35], cos_bit); - bf1[61] = half_btf(cospi[8], bf0[61], cospi[56], bf0[34], cos_bit); - bf1[62] = bf0[62]; - bf1[63] = bf0[63]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 7 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = half_btf(cospi[56], bf0[4], cospi[8], bf0[7], cos_bit); - bf1[5] = half_btf(cospi[24], bf0[5], cospi[40], bf0[6], cos_bit); - bf1[6] = half_btf(cospi[24], bf0[6], -cospi[40], bf0[5], cos_bit); - bf1[7] = half_btf(cospi[56], bf0[7], -cospi[8], bf0[4], cos_bit); - bf1[8] = bf0[8] + bf0[9]; - bf1[9] = -bf0[9] + bf0[8]; - bf1[10] = -bf0[10] + bf0[11]; - bf1[11] = bf0[11] + bf0[10]; - bf1[12] = bf0[12] + bf0[13]; - bf1[13] = -bf0[13] + bf0[12]; - bf1[14] = -bf0[14] + bf0[15]; - bf1[15] = bf0[15] + bf0[14]; - bf1[16] = bf0[16]; - bf1[17] = half_btf(-cospi[8], bf0[17], cospi[56], bf0[30], cos_bit); - bf1[18] = half_btf(-cospi[56], bf0[18], -cospi[8], bf0[29], cos_bit); - bf1[19] = bf0[19]; - bf1[20] = bf0[20]; - bf1[21] = half_btf(-cospi[40], bf0[21], cospi[24], bf0[26], cos_bit); - bf1[22] = half_btf(-cospi[24], bf0[22], -cospi[40], bf0[25], cos_bit); - bf1[23] = bf0[23]; - bf1[24] = bf0[24]; - bf1[25] = half_btf(cospi[24], bf0[25], -cospi[40], bf0[22], cos_bit); - bf1[26] = half_btf(cospi[40], bf0[26], cospi[24], bf0[21], cos_bit); - bf1[27] = bf0[27]; - bf1[28] = bf0[28]; - bf1[29] = half_btf(cospi[56], bf0[29], -cospi[8], bf0[18], cos_bit); - bf1[30] = half_btf(cospi[8], bf0[30], cospi[56], bf0[17], cos_bit); - bf1[31] = bf0[31]; - bf1[32] = bf0[32] + bf0[35]; - bf1[33] = bf0[33] + bf0[34]; - bf1[34] = -bf0[34] + bf0[33]; - bf1[35] = -bf0[35] + bf0[32]; - bf1[36] = -bf0[36] + bf0[39]; - bf1[37] = -bf0[37] + bf0[38]; - bf1[38] = bf0[38] + bf0[37]; - bf1[39] = bf0[39] + bf0[36]; - bf1[40] = bf0[40] + bf0[43]; - bf1[41] = bf0[41] + bf0[42]; - bf1[42] = -bf0[42] + bf0[41]; - bf1[43] = -bf0[43] + bf0[40]; - bf1[44] = -bf0[44] + bf0[47]; - bf1[45] = -bf0[45] + bf0[46]; - bf1[46] = bf0[46] + bf0[45]; - bf1[47] = bf0[47] + bf0[44]; - bf1[48] = bf0[48] + bf0[51]; - bf1[49] = bf0[49] + bf0[50]; - bf1[50] = -bf0[50] + bf0[49]; - bf1[51] = -bf0[51] + bf0[48]; - bf1[52] = -bf0[52] + bf0[55]; - bf1[53] = -bf0[53] + bf0[54]; - bf1[54] = bf0[54] + bf0[53]; - bf1[55] = bf0[55] + bf0[52]; - bf1[56] = bf0[56] + bf0[59]; - bf1[57] = bf0[57] + bf0[58]; - bf1[58] = -bf0[58] + bf0[57]; - bf1[59] = -bf0[59] + bf0[56]; - bf1[60] = -bf0[60] + bf0[63]; - bf1[61] = -bf0[61] + bf0[62]; - bf1[62] = bf0[62] + bf0[61]; - bf1[63] = bf0[63] + bf0[60]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 8 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = half_btf(cospi[60], bf0[8], cospi[4], bf0[15], cos_bit); - bf1[9] = half_btf(cospi[28], bf0[9], cospi[36], bf0[14], cos_bit); - bf1[10] = half_btf(cospi[44], bf0[10], cospi[20], bf0[13], cos_bit); - bf1[11] = half_btf(cospi[12], bf0[11], cospi[52], bf0[12], cos_bit); - bf1[12] = half_btf(cospi[12], bf0[12], -cospi[52], bf0[11], cos_bit); - bf1[13] = half_btf(cospi[44], bf0[13], -cospi[20], bf0[10], cos_bit); - bf1[14] = half_btf(cospi[28], bf0[14], -cospi[36], bf0[9], cos_bit); - bf1[15] = half_btf(cospi[60], bf0[15], -cospi[4], bf0[8], cos_bit); - bf1[16] = bf0[16] + bf0[17]; - bf1[17] = -bf0[17] + bf0[16]; - bf1[18] = -bf0[18] + bf0[19]; - bf1[19] = bf0[19] + bf0[18]; - bf1[20] = bf0[20] + bf0[21]; - bf1[21] = -bf0[21] + bf0[20]; - bf1[22] = -bf0[22] + bf0[23]; - bf1[23] = bf0[23] + bf0[22]; - bf1[24] = bf0[24] + bf0[25]; - bf1[25] = -bf0[25] + bf0[24]; - bf1[26] = -bf0[26] + bf0[27]; - bf1[27] = bf0[27] + bf0[26]; - bf1[28] = bf0[28] + bf0[29]; - bf1[29] = -bf0[29] + bf0[28]; - bf1[30] = -bf0[30] + bf0[31]; - bf1[31] = bf0[31] + bf0[30]; - bf1[32] = bf0[32]; - bf1[33] = half_btf(-cospi[4], bf0[33], cospi[60], bf0[62], cos_bit); - bf1[34] = half_btf(-cospi[60], bf0[34], -cospi[4], bf0[61], cos_bit); - bf1[35] = bf0[35]; - bf1[36] = bf0[36]; - bf1[37] = half_btf(-cospi[36], bf0[37], cospi[28], bf0[58], cos_bit); - bf1[38] = half_btf(-cospi[28], bf0[38], -cospi[36], bf0[57], cos_bit); - bf1[39] = bf0[39]; - bf1[40] = bf0[40]; - bf1[41] = half_btf(-cospi[20], bf0[41], cospi[44], bf0[54], cos_bit); - bf1[42] = half_btf(-cospi[44], bf0[42], -cospi[20], bf0[53], cos_bit); - bf1[43] = bf0[43]; - bf1[44] = bf0[44]; - bf1[45] = half_btf(-cospi[52], bf0[45], cospi[12], bf0[50], cos_bit); - bf1[46] = half_btf(-cospi[12], bf0[46], -cospi[52], bf0[49], cos_bit); - bf1[47] = bf0[47]; - bf1[48] = bf0[48]; - bf1[49] = half_btf(cospi[12], bf0[49], -cospi[52], bf0[46], cos_bit); - bf1[50] = half_btf(cospi[52], bf0[50], cospi[12], bf0[45], cos_bit); - bf1[51] = bf0[51]; - bf1[52] = bf0[52]; - bf1[53] = half_btf(cospi[44], bf0[53], -cospi[20], bf0[42], cos_bit); - bf1[54] = half_btf(cospi[20], bf0[54], cospi[44], bf0[41], cos_bit); - bf1[55] = bf0[55]; - bf1[56] = bf0[56]; - bf1[57] = half_btf(cospi[28], bf0[57], -cospi[36], bf0[38], cos_bit); - bf1[58] = half_btf(cospi[36], bf0[58], cospi[28], bf0[37], cos_bit); - bf1[59] = bf0[59]; - bf1[60] = bf0[60]; - bf1[61] = half_btf(cospi[60], bf0[61], -cospi[4], bf0[34], cos_bit); - bf1[62] = half_btf(cospi[4], bf0[62], cospi[60], bf0[33], cos_bit); - bf1[63] = bf0[63]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 9 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = bf0[10]; - bf1[11] = bf0[11]; - bf1[12] = bf0[12]; - bf1[13] = bf0[13]; - bf1[14] = bf0[14]; - bf1[15] = bf0[15]; - bf1[16] = half_btf(cospi[62], bf0[16], cospi[2], bf0[31], cos_bit); - bf1[17] = half_btf(cospi[30], bf0[17], cospi[34], bf0[30], cos_bit); - bf1[18] = half_btf(cospi[46], bf0[18], cospi[18], bf0[29], cos_bit); - bf1[19] = half_btf(cospi[14], bf0[19], cospi[50], bf0[28], cos_bit); - bf1[20] = half_btf(cospi[54], bf0[20], cospi[10], bf0[27], cos_bit); - bf1[21] = half_btf(cospi[22], bf0[21], cospi[42], bf0[26], cos_bit); - bf1[22] = half_btf(cospi[38], bf0[22], cospi[26], bf0[25], cos_bit); - bf1[23] = half_btf(cospi[6], bf0[23], cospi[58], bf0[24], cos_bit); - bf1[24] = half_btf(cospi[6], bf0[24], -cospi[58], bf0[23], cos_bit); - bf1[25] = half_btf(cospi[38], bf0[25], -cospi[26], bf0[22], cos_bit); - bf1[26] = half_btf(cospi[22], bf0[26], -cospi[42], bf0[21], cos_bit); - bf1[27] = half_btf(cospi[54], bf0[27], -cospi[10], bf0[20], cos_bit); - bf1[28] = half_btf(cospi[14], bf0[28], -cospi[50], bf0[19], cos_bit); - bf1[29] = half_btf(cospi[46], bf0[29], -cospi[18], bf0[18], cos_bit); - bf1[30] = half_btf(cospi[30], bf0[30], -cospi[34], bf0[17], cos_bit); - bf1[31] = half_btf(cospi[62], bf0[31], -cospi[2], bf0[16], cos_bit); - bf1[32] = bf0[32] + bf0[33]; - bf1[33] = -bf0[33] + bf0[32]; - bf1[34] = -bf0[34] + bf0[35]; - bf1[35] = bf0[35] + bf0[34]; - bf1[36] = bf0[36] + bf0[37]; - bf1[37] = -bf0[37] + bf0[36]; - bf1[38] = -bf0[38] + bf0[39]; - bf1[39] = bf0[39] + bf0[38]; - bf1[40] = bf0[40] + bf0[41]; - bf1[41] = -bf0[41] + bf0[40]; - bf1[42] = -bf0[42] + bf0[43]; - bf1[43] = bf0[43] + bf0[42]; - bf1[44] = bf0[44] + bf0[45]; - bf1[45] = -bf0[45] + bf0[44]; - bf1[46] = -bf0[46] + bf0[47]; - bf1[47] = bf0[47] + bf0[46]; - bf1[48] = bf0[48] + bf0[49]; - bf1[49] = -bf0[49] + bf0[48]; - bf1[50] = -bf0[50] + bf0[51]; - bf1[51] = bf0[51] + bf0[50]; - bf1[52] = bf0[52] + bf0[53]; - bf1[53] = -bf0[53] + bf0[52]; - bf1[54] = -bf0[54] + bf0[55]; - bf1[55] = bf0[55] + bf0[54]; - bf1[56] = bf0[56] + bf0[57]; - bf1[57] = -bf0[57] + bf0[56]; - bf1[58] = -bf0[58] + bf0[59]; - bf1[59] = bf0[59] + bf0[58]; - bf1[60] = bf0[60] + bf0[61]; - bf1[61] = -bf0[61] + bf0[60]; - bf1[62] = -bf0[62] + bf0[63]; - bf1[63] = bf0[63] + bf0[62]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 10 - stage++; - cospi = cospi_arr(cos_bit); - bf0 = output; - bf1 = step; - bf1[0] = bf0[0]; - bf1[1] = bf0[1]; - bf1[2] = bf0[2]; - bf1[3] = bf0[3]; - bf1[4] = bf0[4]; - bf1[5] = bf0[5]; - bf1[6] = bf0[6]; - bf1[7] = bf0[7]; - bf1[8] = bf0[8]; - bf1[9] = bf0[9]; - bf1[10] = bf0[10]; - bf1[11] = bf0[11]; - bf1[12] = bf0[12]; - bf1[13] = bf0[13]; - bf1[14] = bf0[14]; - bf1[15] = bf0[15]; - bf1[16] = bf0[16]; - bf1[17] = bf0[17]; - bf1[18] = bf0[18]; - bf1[19] = bf0[19]; - bf1[20] = bf0[20]; - bf1[21] = bf0[21]; - bf1[22] = bf0[22]; - bf1[23] = bf0[23]; - bf1[24] = bf0[24]; - bf1[25] = bf0[25]; - bf1[26] = bf0[26]; - bf1[27] = bf0[27]; - bf1[28] = bf0[28]; - bf1[29] = bf0[29]; - bf1[30] = bf0[30]; - bf1[31] = bf0[31]; - bf1[32] = half_btf(cospi[63], bf0[32], cospi[1], bf0[63], cos_bit); - bf1[33] = half_btf(cospi[31], bf0[33], cospi[33], bf0[62], cos_bit); - bf1[34] = half_btf(cospi[47], bf0[34], cospi[17], bf0[61], cos_bit); - bf1[35] = half_btf(cospi[15], bf0[35], cospi[49], bf0[60], cos_bit); - bf1[36] = half_btf(cospi[55], bf0[36], cospi[9], bf0[59], cos_bit); - bf1[37] = half_btf(cospi[23], bf0[37], cospi[41], bf0[58], cos_bit); - bf1[38] = half_btf(cospi[39], bf0[38], cospi[25], bf0[57], cos_bit); - bf1[39] = half_btf(cospi[7], bf0[39], cospi[57], bf0[56], cos_bit); - bf1[40] = half_btf(cospi[59], bf0[40], cospi[5], bf0[55], cos_bit); - bf1[41] = half_btf(cospi[27], bf0[41], cospi[37], bf0[54], cos_bit); - bf1[42] = half_btf(cospi[43], bf0[42], cospi[21], bf0[53], cos_bit); - bf1[43] = half_btf(cospi[11], bf0[43], cospi[53], bf0[52], cos_bit); - bf1[44] = half_btf(cospi[51], bf0[44], cospi[13], bf0[51], cos_bit); - bf1[45] = half_btf(cospi[19], bf0[45], cospi[45], bf0[50], cos_bit); - bf1[46] = half_btf(cospi[35], bf0[46], cospi[29], bf0[49], cos_bit); - bf1[47] = half_btf(cospi[3], bf0[47], cospi[61], bf0[48], cos_bit); - bf1[48] = half_btf(cospi[3], bf0[48], -cospi[61], bf0[47], cos_bit); - bf1[49] = half_btf(cospi[35], bf0[49], -cospi[29], bf0[46], cos_bit); - bf1[50] = half_btf(cospi[19], bf0[50], -cospi[45], bf0[45], cos_bit); - bf1[51] = half_btf(cospi[51], bf0[51], -cospi[13], bf0[44], cos_bit); - bf1[52] = half_btf(cospi[11], bf0[52], -cospi[53], bf0[43], cos_bit); - bf1[53] = half_btf(cospi[43], bf0[53], -cospi[21], bf0[42], cos_bit); - bf1[54] = half_btf(cospi[27], bf0[54], -cospi[37], bf0[41], cos_bit); - bf1[55] = half_btf(cospi[59], bf0[55], -cospi[5], bf0[40], cos_bit); - bf1[56] = half_btf(cospi[7], bf0[56], -cospi[57], bf0[39], cos_bit); - bf1[57] = half_btf(cospi[39], bf0[57], -cospi[25], bf0[38], cos_bit); - bf1[58] = half_btf(cospi[23], bf0[58], -cospi[41], bf0[37], cos_bit); - bf1[59] = half_btf(cospi[55], bf0[59], -cospi[9], bf0[36], cos_bit); - bf1[60] = half_btf(cospi[15], bf0[60], -cospi[49], bf0[35], cos_bit); - bf1[61] = half_btf(cospi[47], bf0[61], -cospi[17], bf0[34], cos_bit); - bf1[62] = half_btf(cospi[31], bf0[62], -cospi[33], bf0[33], cos_bit); - bf1[63] = half_btf(cospi[63], bf0[63], -cospi[1], bf0[32], cos_bit); - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); - - // stage 11 - stage++; - bf0 = step; - bf1 = output; - bf1[0] = bf0[0]; - bf1[1] = bf0[32]; - bf1[2] = bf0[16]; - bf1[3] = bf0[48]; - bf1[4] = bf0[8]; - bf1[5] = bf0[40]; - bf1[6] = bf0[24]; - bf1[7] = bf0[56]; - bf1[8] = bf0[4]; - bf1[9] = bf0[36]; - bf1[10] = bf0[20]; - bf1[11] = bf0[52]; - bf1[12] = bf0[12]; - bf1[13] = bf0[44]; - bf1[14] = bf0[28]; - bf1[15] = bf0[60]; - bf1[16] = bf0[2]; - bf1[17] = bf0[34]; - bf1[18] = bf0[18]; - bf1[19] = bf0[50]; - bf1[20] = bf0[10]; - bf1[21] = bf0[42]; - bf1[22] = bf0[26]; - bf1[23] = bf0[58]; - bf1[24] = bf0[6]; - bf1[25] = bf0[38]; - bf1[26] = bf0[22]; - bf1[27] = bf0[54]; - bf1[28] = bf0[14]; - bf1[29] = bf0[46]; - bf1[30] = bf0[30]; - bf1[31] = bf0[62]; - bf1[32] = bf0[1]; - bf1[33] = bf0[33]; - bf1[34] = bf0[17]; - bf1[35] = bf0[49]; - bf1[36] = bf0[9]; - bf1[37] = bf0[41]; - bf1[38] = bf0[25]; - bf1[39] = bf0[57]; - bf1[40] = bf0[5]; - bf1[41] = bf0[37]; - bf1[42] = bf0[21]; - bf1[43] = bf0[53]; - bf1[44] = bf0[13]; - bf1[45] = bf0[45]; - bf1[46] = bf0[29]; - bf1[47] = bf0[61]; - bf1[48] = bf0[3]; - bf1[49] = bf0[35]; - bf1[50] = bf0[19]; - bf1[51] = bf0[51]; - bf1[52] = bf0[11]; - bf1[53] = bf0[43]; - bf1[54] = bf0[27]; - bf1[55] = bf0[59]; - bf1[56] = bf0[7]; - bf1[57] = bf0[39]; - bf1[58] = bf0[23]; - bf1[59] = bf0[55]; - bf1[60] = bf0[15]; - bf1[61] = bf0[47]; - bf1[62] = bf0[31]; - bf1[63] = bf0[63]; - av1_range_check_buf(stage, input, bf1, size, stage_range[stage]); -} diff --git a/third_party/aom/av1/encoder/av1_fwd_txfm1d.h b/third_party/aom/av1/encoder/av1_fwd_txfm1d.h deleted file mode 100644 index 9dcf16552..000000000 --- a/third_party/aom/av1/encoder/av1_fwd_txfm1d.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_AV1_FWD_TXFM1D_H_ -#define AOM_AV1_ENCODER_AV1_FWD_TXFM1D_H_ - -#include "av1/common/av1_txfm.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void av1_fdct4_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_fdct8_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_fdct16_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_fdct32_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_fdct64_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_fadst4_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_fadst8_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_fadst16_new(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_fidentity4_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_fidentity8_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_fidentity16_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -void av1_fidentity32_c(const int32_t *input, int32_t *output, int8_t cos_bit, - const int8_t *stage_range); -#ifdef __cplusplus -} -#endif - -#endif // AOM_AV1_ENCODER_AV1_FWD_TXFM1D_H_ diff --git a/third_party/aom/av1/encoder/av1_fwd_txfm1d_cfg.h b/third_party/aom/av1/encoder/av1_fwd_txfm1d_cfg.h deleted file mode 100644 index 98b6530db..000000000 --- a/third_party/aom/av1/encoder/av1_fwd_txfm1d_cfg.h +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_AV1_FWD_TXFM1D_CFG_H_ -#define AOM_AV1_ENCODER_AV1_FWD_TXFM1D_CFG_H_ -#include "av1/common/enums.h" -#include "av1/encoder/av1_fwd_txfm1d.h" -extern const int8_t *fwd_txfm_shift_ls[TX_SIZES_ALL]; -extern const int8_t fwd_cos_bit_col[5][5]; -extern const int8_t fwd_cos_bit_row[5][5]; -#endif // AOM_AV1_ENCODER_AV1_FWD_TXFM1D_CFG_H_ diff --git a/third_party/aom/av1/encoder/av1_fwd_txfm2d.c b/third_party/aom/av1/encoder/av1_fwd_txfm2d.c deleted file mode 100644 index f25a667cf..000000000 --- a/third_party/aom/av1/encoder/av1_fwd_txfm2d.c +++ /dev/null @@ -1,431 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include - -#include "config/aom_dsp_rtcd.h" -#include "config/av1_rtcd.h" - -#include "aom_dsp/txfm_common.h" -#include "av1/common/enums.h" -#include "av1/common/av1_txfm.h" -#include "av1/encoder/av1_fwd_txfm1d.h" -#include "av1/encoder/av1_fwd_txfm1d_cfg.h" - -static INLINE TxfmFunc fwd_txfm_type_to_func(TXFM_TYPE txfm_type) { - switch (txfm_type) { - case TXFM_TYPE_DCT4: return av1_fdct4_new; - case TXFM_TYPE_DCT8: return av1_fdct8_new; - case TXFM_TYPE_DCT16: return av1_fdct16_new; - case TXFM_TYPE_DCT32: return av1_fdct32_new; - case TXFM_TYPE_DCT64: return av1_fdct64_new; - case TXFM_TYPE_ADST4: return av1_fadst4_new; - case TXFM_TYPE_ADST8: return av1_fadst8_new; - case TXFM_TYPE_ADST16: return av1_fadst16_new; - case TXFM_TYPE_IDENTITY4: return av1_fidentity4_c; - case TXFM_TYPE_IDENTITY8: return av1_fidentity8_c; - case TXFM_TYPE_IDENTITY16: return av1_fidentity16_c; - case TXFM_TYPE_IDENTITY32: return av1_fidentity32_c; - default: assert(0); return NULL; - } -} - -void av1_gen_fwd_stage_range(int8_t *stage_range_col, int8_t *stage_range_row, - const TXFM_2D_FLIP_CFG *cfg, int bd) { - // Take the shift from the larger dimension in the rectangular case. - const int8_t *shift = cfg->shift; - // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning - for (int i = 0; i < cfg->stage_num_col && i < MAX_TXFM_STAGE_NUM; ++i) { - stage_range_col[i] = cfg->stage_range_col[i] + shift[0] + bd + 1; - } - - // i < MAX_TXFM_STAGE_NUM will mute above array bounds warning - for (int i = 0; i < cfg->stage_num_row && i < MAX_TXFM_STAGE_NUM; ++i) { - stage_range_row[i] = cfg->stage_range_row[i] + shift[0] + shift[1] + bd + 1; - } -} - -static INLINE void fwd_txfm2d_c(const int16_t *input, int32_t *output, - const int stride, const TXFM_2D_FLIP_CFG *cfg, - int32_t *buf, int bd) { - int c, r; - // Note when assigning txfm_size_col, we use the txfm_size from the - // row configuration and vice versa. This is intentionally done to - // accurately perform rectangular transforms. When the transform is - // rectangular, the number of columns will be the same as the - // txfm_size stored in the row cfg struct. It will make no difference - // for square transforms. - const int txfm_size_col = tx_size_wide[cfg->tx_size]; - const int txfm_size_row = tx_size_high[cfg->tx_size]; - // Take the shift from the larger dimension in the rectangular case. - const int8_t *shift = cfg->shift; - const int rect_type = get_rect_tx_log_ratio(txfm_size_col, txfm_size_row); - int8_t stage_range_col[MAX_TXFM_STAGE_NUM]; - int8_t stage_range_row[MAX_TXFM_STAGE_NUM]; - assert(cfg->stage_num_col <= MAX_TXFM_STAGE_NUM); - assert(cfg->stage_num_row <= MAX_TXFM_STAGE_NUM); - av1_gen_fwd_stage_range(stage_range_col, stage_range_row, cfg, bd); - - const int8_t cos_bit_col = cfg->cos_bit_col; - const int8_t cos_bit_row = cfg->cos_bit_row; - const TxfmFunc txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col); - const TxfmFunc txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row); - - // use output buffer as temp buffer - int32_t *temp_in = output; - int32_t *temp_out = output + txfm_size_row; - - // Columns - for (c = 0; c < txfm_size_col; ++c) { - if (cfg->ud_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) temp_in[r] = input[r * stride + c]; - } else { - for (r = 0; r < txfm_size_row; ++r) - // flip upside down - temp_in[r] = input[(txfm_size_row - r - 1) * stride + c]; - } - av1_round_shift_array(temp_in, txfm_size_row, -shift[0]); - txfm_func_col(temp_in, temp_out, cos_bit_col, stage_range_col); - av1_round_shift_array(temp_out, txfm_size_row, -shift[1]); - if (cfg->lr_flip == 0) { - for (r = 0; r < txfm_size_row; ++r) - buf[r * txfm_size_col + c] = temp_out[r]; - } else { - for (r = 0; r < txfm_size_row; ++r) - // flip from left to right - buf[r * txfm_size_col + (txfm_size_col - c - 1)] = temp_out[r]; - } - } - - // Rows - for (r = 0; r < txfm_size_row; ++r) { - txfm_func_row(buf + r * txfm_size_col, output + r * txfm_size_col, - cos_bit_row, stage_range_row); - av1_round_shift_array(output + r * txfm_size_col, txfm_size_col, -shift[2]); - if (abs(rect_type) == 1) { - // Multiply everything by Sqrt2 if the transform is rectangular and the - // size difference is a factor of 2. - for (c = 0; c < txfm_size_col; ++c) { - output[r * txfm_size_col + c] = round_shift( - (int64_t)output[r * txfm_size_col + c] * NewSqrt2, NewSqrt2Bits); - } - } - } -} - -void av1_fwd_txfm2d_4x8_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 8]); - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_4X8, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); -} - -void av1_fwd_txfm2d_8x4_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - int32_t txfm_buf[8 * 4]; - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_8X4, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); -} - -void av1_fwd_txfm2d_8x16_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int32_t, txfm_buf[8 * 16]); - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_8X16, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); -} - -void av1_fwd_txfm2d_16x8_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - int32_t txfm_buf[16 * 8]; - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_16X8, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); -} - -void av1_fwd_txfm2d_16x32_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int32_t, txfm_buf[16 * 32]); - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_16X32, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); -} - -void av1_fwd_txfm2d_32x16_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - int32_t txfm_buf[32 * 16]; - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_32X16, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); -} - -void av1_fwd_txfm2d_4x16_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int32_t, txfm_buf[4 * 16]); - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_4X16, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); -} - -void av1_fwd_txfm2d_16x4_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - int32_t txfm_buf[16 * 4]; - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_16X4, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); -} - -void av1_fwd_txfm2d_8x32_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int32_t, txfm_buf[32 * 8]); - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_8X32, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); -} - -void av1_fwd_txfm2d_32x8_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - int32_t txfm_buf[32 * 8]; - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_32X8, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); -} - -void av1_fwd_txfm2d_4x4_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - int32_t txfm_buf[4 * 4]; - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_4X4, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); -} - -void av1_fwd_txfm2d_8x8_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - int32_t txfm_buf[8 * 8]; - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_8X8, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); -} - -void av1_fwd_txfm2d_16x16_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - int32_t txfm_buf[16 * 16]; - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_16X16, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); -} - -void av1_fwd_txfm2d_32x32_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - int32_t txfm_buf[32 * 32]; - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_32X32, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); -} - -void av1_fwd_txfm2d_64x64_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - int32_t txfm_buf[64 * 64]; - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_64X64, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); - - // Zero out top-right 32x32 area. - for (int row = 0; row < 32; ++row) { - memset(output + row * 64 + 32, 0, 32 * sizeof(*output)); - } - // Zero out the bottom 64x32 area. - memset(output + 32 * 64, 0, 32 * 64 * sizeof(*output)); - // Re-pack non-zero coeffs in the first 32x32 indices. - for (int row = 1; row < 32; ++row) { - memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output)); - } -} - -void av1_fwd_txfm2d_32x64_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int32_t, txfm_buf[32 * 64]); - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_32X64, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); - // Zero out the bottom 32x32 area. - memset(output + 32 * 32, 0, 32 * 32 * sizeof(*output)); - // Note: no repacking needed here. -} - -void av1_fwd_txfm2d_64x32_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - int32_t txfm_buf[64 * 32]; - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_64X32, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); - - // Zero out right 32x32 area. - for (int row = 0; row < 32; ++row) { - memset(output + row * 64 + 32, 0, 32 * sizeof(*output)); - } - // Re-pack non-zero coeffs in the first 32x32 indices. - for (int row = 1; row < 32; ++row) { - memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output)); - } -} - -void av1_fwd_txfm2d_16x64_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(32, int32_t, txfm_buf[64 * 16]); - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_16X64, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); - // Zero out the bottom 16x32 area. - memset(output + 16 * 32, 0, 16 * 32 * sizeof(*output)); - // Note: no repacking needed here. -} - -void av1_fwd_txfm2d_64x16_c(const int16_t *input, int32_t *output, int stride, - TX_TYPE tx_type, int bd) { - int32_t txfm_buf[64 * 16]; - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_64X16, &cfg); - fwd_txfm2d_c(input, output, stride, &cfg, txfm_buf, bd); - // Zero out right 32x16 area. - for (int row = 0; row < 16; ++row) { - memset(output + row * 64 + 32, 0, 32 * sizeof(*output)); - } - // Re-pack non-zero coeffs in the first 32x16 indices. - for (int row = 1; row < 16; ++row) { - memcpy(output + row * 32, output + row * 64, 32 * sizeof(*output)); - } -} - -static const int8_t fwd_shift_4x4[3] = { 2, 0, 0 }; -static const int8_t fwd_shift_8x8[3] = { 2, -1, 0 }; -static const int8_t fwd_shift_16x16[3] = { 2, -2, 0 }; -static const int8_t fwd_shift_32x32[3] = { 2, -4, 0 }; -static const int8_t fwd_shift_64x64[3] = { 0, -2, -2 }; -static const int8_t fwd_shift_4x8[3] = { 2, -1, 0 }; -static const int8_t fwd_shift_8x4[3] = { 2, -1, 0 }; -static const int8_t fwd_shift_8x16[3] = { 2, -2, 0 }; -static const int8_t fwd_shift_16x8[3] = { 2, -2, 0 }; -static const int8_t fwd_shift_16x32[3] = { 2, -4, 0 }; -static const int8_t fwd_shift_32x16[3] = { 2, -4, 0 }; -static const int8_t fwd_shift_32x64[3] = { 0, -2, -2 }; -static const int8_t fwd_shift_64x32[3] = { 2, -4, -2 }; -static const int8_t fwd_shift_4x16[3] = { 2, -1, 0 }; -static const int8_t fwd_shift_16x4[3] = { 2, -1, 0 }; -static const int8_t fwd_shift_8x32[3] = { 2, -2, 0 }; -static const int8_t fwd_shift_32x8[3] = { 2, -2, 0 }; -static const int8_t fwd_shift_16x64[3] = { 0, -2, 0 }; -static const int8_t fwd_shift_64x16[3] = { 2, -4, 0 }; - -const int8_t *fwd_txfm_shift_ls[TX_SIZES_ALL] = { - fwd_shift_4x4, fwd_shift_8x8, fwd_shift_16x16, fwd_shift_32x32, - fwd_shift_64x64, fwd_shift_4x8, fwd_shift_8x4, fwd_shift_8x16, - fwd_shift_16x8, fwd_shift_16x32, fwd_shift_32x16, fwd_shift_32x64, - fwd_shift_64x32, fwd_shift_4x16, fwd_shift_16x4, fwd_shift_8x32, - fwd_shift_32x8, fwd_shift_16x64, fwd_shift_64x16, -}; - -const int8_t fwd_cos_bit_col[MAX_TXWH_IDX /*txw_idx*/] - [MAX_TXWH_IDX /*txh_idx*/] = { - { 13, 13, 13, 0, 0 }, - { 13, 13, 13, 12, 0 }, - { 13, 13, 13, 12, 13 }, - { 0, 13, 13, 12, 13 }, - { 0, 0, 13, 12, 13 } - }; - -const int8_t fwd_cos_bit_row[MAX_TXWH_IDX /*txw_idx*/] - [MAX_TXWH_IDX /*txh_idx*/] = { - { 13, 13, 12, 0, 0 }, - { 13, 13, 13, 12, 0 }, - { 13, 13, 12, 13, 12 }, - { 0, 12, 13, 12, 11 }, - { 0, 0, 12, 11, 10 } - }; - -static const int8_t fdct4_range_mult2[4] = { 0, 2, 3, 3 }; -static const int8_t fdct8_range_mult2[6] = { 0, 2, 4, 5, 5, 5 }; -static const int8_t fdct16_range_mult2[8] = { 0, 2, 4, 6, 7, 7, 7, 7 }; -static const int8_t fdct32_range_mult2[10] = { 0, 2, 4, 6, 8, 9, 9, 9, 9, 9 }; -static const int8_t fdct64_range_mult2[12] = { 0, 2, 4, 6, 8, 10, - 11, 11, 11, 11, 11, 11 }; - -static const int8_t fadst4_range_mult2[7] = { 0, 2, 4, 3, 3, 3, 3 }; -static const int8_t fadst8_range_mult2[8] = { 0, 0, 1, 3, 3, 5, 5, 5 }; -static const int8_t fadst16_range_mult2[10] = { 0, 0, 1, 3, 3, 5, 5, 7, 7, 7 }; - -static const int8_t max_fwd_range_mult2_col[5] = { 3, 5, 7, 9, 11 }; - -static const int8_t fidtx4_range_mult2[1] = { 1 }; -static const int8_t fidtx8_range_mult2[1] = { 2 }; -static const int8_t fidtx16_range_mult2[1] = { 3 }; -static const int8_t fidtx32_range_mult2[1] = { 4 }; - -#if 0 -const int8_t fwd_idtx_range_row[MAX_TXWH_IDX /*txw_idx*/] - [MAX_TXWH_IDX /*txh_idx*/] = { { 2, 4, 5, 0, 0 }, - { 3, 4, 5, 6, 0 }, - { 4, 5, 6, 7, 8 }, - { 0, 5, 6, 7, 8 }, - { 0, 0, 7, 8, - 9 } }; -#endif - -const int8_t *fwd_txfm_range_mult2_list[TXFM_TYPES] = { - fdct4_range_mult2, fdct8_range_mult2, fdct16_range_mult2, - fdct32_range_mult2, fdct64_range_mult2, fadst4_range_mult2, - fadst8_range_mult2, fadst16_range_mult2, fidtx4_range_mult2, - fidtx8_range_mult2, fidtx16_range_mult2, fidtx32_range_mult2 -}; - -static INLINE void set_fwd_txfm_non_scale_range(TXFM_2D_FLIP_CFG *cfg) { - const int txh_idx = get_txh_idx(cfg->tx_size); - av1_zero(cfg->stage_range_col); - av1_zero(cfg->stage_range_row); - - if (cfg->txfm_type_col != TXFM_TYPE_INVALID) { - int stage_num_col = cfg->stage_num_col; - const int8_t *range_mult2_col = - fwd_txfm_range_mult2_list[cfg->txfm_type_col]; - for (int i = 0; i < stage_num_col; ++i) - cfg->stage_range_col[i] = (range_mult2_col[i] + 1) >> 1; - } - - if (cfg->txfm_type_row != TXFM_TYPE_INVALID) { - int stage_num_row = cfg->stage_num_row; - const int8_t *range_mult2_row = - fwd_txfm_range_mult2_list[cfg->txfm_type_row]; - for (int i = 0; i < stage_num_row; ++i) - cfg->stage_range_row[i] = - (max_fwd_range_mult2_col[txh_idx] + range_mult2_row[i] + 1) >> 1; - } -} - -void av1_get_fwd_txfm_cfg(TX_TYPE tx_type, TX_SIZE tx_size, - TXFM_2D_FLIP_CFG *cfg) { - assert(cfg != NULL); - cfg->tx_size = tx_size; - set_flip_cfg(tx_type, cfg); - const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type]; - const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type]; - const int txw_idx = tx_size_wide_log2[tx_size] - tx_size_wide_log2[0]; - const int txh_idx = tx_size_high_log2[tx_size] - tx_size_high_log2[0]; - cfg->shift = fwd_txfm_shift_ls[tx_size]; - cfg->cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - cfg->cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - cfg->txfm_type_col = av1_txfm_type_ls[txh_idx][tx_type_1d_col]; - cfg->txfm_type_row = av1_txfm_type_ls[txw_idx][tx_type_1d_row]; - cfg->stage_num_col = av1_txfm_stage_num_list[cfg->txfm_type_col]; - cfg->stage_num_row = av1_txfm_stage_num_list[cfg->txfm_type_row]; - set_fwd_txfm_non_scale_range(cfg); -} diff --git a/third_party/aom/av1/encoder/av1_quantize.c b/third_party/aom/av1/encoder/av1_quantize.c deleted file mode 100644 index a0a926005..000000000 --- a/third_party/aom/av1/encoder/av1_quantize.c +++ /dev/null @@ -1,738 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include - -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/quantize.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/mem.h" - -#include "av1/common/idct.h" -#include "av1/common/quant_common.h" -#include "av1/common/scan.h" -#include "av1/common/seg_common.h" - -#include "av1/encoder/av1_quantize.h" -#include "av1/encoder/encoder.h" -#include "av1/encoder/rd.h" - -void av1_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr) { - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - *eob_ptr = 0; -} - -static void quantize_fp_helper_c( - const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, - const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, - const qm_val_t *iqm_ptr, int log_scale) { - int i, eob = -1; - // TODO(jingning) Decide the need of these arguments after the - // quantization process is completed. - (void)zbin_ptr; - (void)quant_shift_ptr; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (qm_ptr == NULL && iqm_ptr == NULL) { - const int rounding0 = ROUND_POWER_OF_TWO(round_ptr[0], log_scale); - { // rc == 0 - const int coeff = coeff_ptr[0]; - const int coeff_sign = (coeff >> 31); - int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - if ((abs_coeff << (1 + log_scale)) >= (int32_t)(dequant_ptr[0])) { - abs_coeff = clamp64(abs_coeff + rounding0, INT16_MIN, INT16_MAX); - const int tmp32 = (int)((abs_coeff * quant_ptr[0]) >> (16 - log_scale)); - if (tmp32) { - qcoeff_ptr[0] = (tmp32 ^ coeff_sign) - coeff_sign; - const tran_low_t abs_dqcoeff = (tmp32 * dequant_ptr[0]) >> log_scale; - dqcoeff_ptr[0] = (abs_dqcoeff ^ coeff_sign) - coeff_sign; - eob = 0; - } - } - } - const int rounding1 = ROUND_POWER_OF_TWO(round_ptr[1], log_scale); - const int32_t thresh1 = (int32_t)(dequant_ptr[1]); - for (i = 1; i < n_coeffs; i++) { - const int coeff = coeff_ptr[i]; - const int coeff_sign = (coeff >> 31); - int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - if ((abs_coeff << (1 + log_scale)) >= thresh1) { - abs_coeff = clamp64(abs_coeff + rounding1, INT16_MIN, INT16_MAX); - const int tmp32 = (int)((abs_coeff * quant_ptr[1]) >> (16 - log_scale)); - if (tmp32) { - qcoeff_ptr[i] = (tmp32 ^ coeff_sign) - coeff_sign; - const tran_low_t abs_dqcoeff = (tmp32 * dequant_ptr[1]) >> log_scale; - dqcoeff_ptr[i] = (abs_dqcoeff ^ coeff_sign) - coeff_sign; - eob = AOMMAX(iscan[i], eob); - } - } - } - } else { - // Quantization pass: All coefficients with index >= zero_flag are - // skippable. Note: zero_flag can be zero. - for (i = 0; i < n_coeffs; i++) { - const int rc = scan[i]; - const int coeff = coeff_ptr[rc]; - const qm_val_t wt = qm_ptr ? qm_ptr[rc] : (1 << AOM_QM_BITS); - const qm_val_t iwt = iqm_ptr ? iqm_ptr[rc] : (1 << AOM_QM_BITS); - const int dequant = - (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> - AOM_QM_BITS; - const int coeff_sign = (coeff >> 31); - int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int tmp32 = 0; - if (abs_coeff * wt >= - (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) { - abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale); - abs_coeff = clamp64(abs_coeff, INT16_MIN, INT16_MAX); - tmp32 = (int)((abs_coeff * wt * quant_ptr[rc != 0]) >> - (16 - log_scale + AOM_QM_BITS)); - qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; - const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale; - dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign; - } - - if (tmp32) eob = i; - } - } - *eob_ptr = eob + 1; -} - -static void highbd_quantize_fp_helper_c( - const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr, - const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan, const qm_val_t *qm_ptr, - const qm_val_t *iqm_ptr, int log_scale) { - int i; - int eob = -1; - const int shift = 16 - log_scale; - // TODO(jingning) Decide the need of these arguments after the - // quantization process is completed. - (void)zbin_ptr; - (void)quant_shift_ptr; - (void)iscan; - - if (qm_ptr || iqm_ptr) { - // Quantization pass: All coefficients with index >= zero_flag are - // skippable. Note: zero_flag can be zero. - for (i = 0; i < count; i++) { - const int rc = scan[i]; - const int coeff = coeff_ptr[rc]; - const qm_val_t wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); - const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS); - const int dequant = - (dequant_ptr[rc != 0] * iwt + (1 << (AOM_QM_BITS - 1))) >> - AOM_QM_BITS; - const int coeff_sign = (coeff >> 31); - const int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int abs_qcoeff = 0; - if (abs_coeff * wt >= - (dequant_ptr[rc != 0] << (AOM_QM_BITS - (1 + log_scale)))) { - const int64_t tmp = - abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale); - abs_qcoeff = - (int)((tmp * quant_ptr[rc != 0] * wt) >> (shift + AOM_QM_BITS)); - qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale; - dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign); - if (abs_qcoeff) eob = i; - } else { - qcoeff_ptr[rc] = 0; - dqcoeff_ptr[rc] = 0; - } - } - } else { - const int log_scaled_round_arr[2] = { - ROUND_POWER_OF_TWO(round_ptr[0], log_scale), - ROUND_POWER_OF_TWO(round_ptr[1], log_scale), - }; - for (i = 0; i < count; i++) { - const int rc = scan[i]; - const int coeff = coeff_ptr[rc]; - const int rc01 = (rc != 0); - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int log_scaled_round = log_scaled_round_arr[rc01]; - if ((abs_coeff << (1 + log_scale)) >= dequant_ptr[rc01]) { - const int quant = quant_ptr[rc01]; - const int dequant = dequant_ptr[rc01]; - const int64_t tmp = (int64_t)abs_coeff + log_scaled_round; - const int abs_qcoeff = (int)((tmp * quant) >> shift); - qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale; - if (abs_qcoeff) eob = i; - dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign); - } else { - qcoeff_ptr[rc] = 0; - dqcoeff_ptr[rc] = 0; - } - } - } - *eob_ptr = eob + 1; -} - -void av1_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - const int16_t *zbin_ptr, const int16_t *round_ptr, - const int16_t *quant_ptr, const int16_t *quant_shift_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan) { - quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, - quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, - eob_ptr, scan, iscan, NULL, NULL, 0); -} - -void av1_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - const int16_t *zbin_ptr, const int16_t *round_ptr, - const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan) { - quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, - quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, - eob_ptr, scan, iscan, NULL, NULL, 1); -} - -void av1_quantize_fp_64x64_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - const int16_t *zbin_ptr, const int16_t *round_ptr, - const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan) { - quantize_fp_helper_c(coeff_ptr, n_coeffs, zbin_ptr, round_ptr, quant_ptr, - quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, - eob_ptr, scan, iscan, NULL, NULL, 2); -} - -void av1_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, - const SCAN_ORDER *sc, const QUANT_PARAM *qparam) { - const qm_val_t *qm_ptr = qparam->qmatrix; - const qm_val_t *iqm_ptr = qparam->iqmatrix; - if (qm_ptr != NULL && iqm_ptr != NULL) { - quantize_fp_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, - p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr, - dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, - sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale); - } else { - switch (qparam->log_scale) { - case 0: - if (n_coeffs < 16) { - // TODO(jingning): Need SIMD implementation for smaller block size - // quantization. - quantize_fp_helper_c( - coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, - p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, - p->dequant_QTX, eob_ptr, sc->scan, sc->iscan, NULL, NULL, 0); - } else { - av1_quantize_fp(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, - p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr, - dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, - sc->iscan); - } - break; - case 1: - av1_quantize_fp_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, - p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr, - dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, - sc->iscan); - break; - case 2: - av1_quantize_fp_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, - p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr, - dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, - sc->iscan); - break; - default: assert(0); - } - } -} - -void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, - const SCAN_ORDER *sc, const QUANT_PARAM *qparam) { - const qm_val_t *qm_ptr = qparam->qmatrix; - const qm_val_t *iqm_ptr = qparam->iqmatrix; - if (qm_ptr != NULL && iqm_ptr != NULL) { - quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, - p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, - dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, - sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale); - } else { - switch (qparam->log_scale) { - case 0: - aom_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, - p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, - dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, - sc->iscan); - break; - case 1: - aom_quantize_b_32x32(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, - p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, - dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, - sc->iscan); - break; - case 2: - aom_quantize_b_64x64(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, - p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, - dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, - sc->iscan); - break; - default: assert(0); - } - } -} - -static void quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, - int skip_block, const int16_t *round_ptr, - const int16_t quant, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, - uint16_t *eob_ptr, const qm_val_t *qm_ptr, - const qm_val_t *iqm_ptr, const int log_scale) { - const int rc = 0; - const int coeff = coeff_ptr[rc]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - int64_t tmp; - int eob = -1; - int32_t tmp32; - int dequant; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - const int wt = qm_ptr != NULL ? qm_ptr[rc] : (1 << AOM_QM_BITS); - const int iwt = iqm_ptr != NULL ? iqm_ptr[rc] : (1 << AOM_QM_BITS); - tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], log_scale), - INT16_MIN, INT16_MAX); - tmp32 = (int32_t)((tmp * wt * quant) >> (16 - log_scale + AOM_QM_BITS)); - qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign; - dequant = (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; - const tran_low_t abs_dqcoeff = (tmp32 * dequant) >> log_scale; - dqcoeff_ptr[rc] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign); - if (tmp32) eob = 0; - } - *eob_ptr = eob + 1; -} - -void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, - const SCAN_ORDER *sc, const QUANT_PARAM *qparam) { - // obsolete skip_block - const int skip_block = 0; - (void)sc; - assert(qparam->log_scale >= 0 && qparam->log_scale < (3)); - const qm_val_t *qm_ptr = qparam->qmatrix; - const qm_val_t *iqm_ptr = qparam->iqmatrix; - quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round_QTX, - p->quant_fp_QTX[0], qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX[0], - eob_ptr, qm_ptr, iqm_ptr, qparam->log_scale); -} - -void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr, - intptr_t n_coeffs, const MACROBLOCK_PLANE *p, - tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, - const SCAN_ORDER *sc, - const QUANT_PARAM *qparam) { - const qm_val_t *qm_ptr = qparam->qmatrix; - const qm_val_t *iqm_ptr = qparam->iqmatrix; - if (qm_ptr != NULL && iqm_ptr != NULL) { - highbd_quantize_fp_helper_c( - coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, p->quant_fp_QTX, - p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr, - sc->scan, sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale); - } else { - if (n_coeffs < 16) { - // TODO(jingning): Need SIMD implementation for smaller block size - // quantization. - av1_highbd_quantize_fp_c( - coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, p->quant_fp_QTX, - p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, eob_ptr, - sc->scan, sc->iscan, qparam->log_scale); - return; - } - av1_highbd_quantize_fp(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_fp_QTX, - p->quant_fp_QTX, p->quant_shift_QTX, qcoeff_ptr, - dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, - sc->iscan, qparam->log_scale); - } -} - -void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr, - intptr_t n_coeffs, const MACROBLOCK_PLANE *p, - tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, - const SCAN_ORDER *sc, - const QUANT_PARAM *qparam) { - const qm_val_t *qm_ptr = qparam->qmatrix; - const qm_val_t *iqm_ptr = qparam->iqmatrix; - if (qm_ptr != NULL && iqm_ptr != NULL) { - highbd_quantize_b_helper_c(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, - p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, - dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, - sc->iscan, qm_ptr, iqm_ptr, qparam->log_scale); - } else { - switch (qparam->log_scale) { - case 0: - if (LIKELY(n_coeffs >= 8)) { - aom_highbd_quantize_b(coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, - p->quant_QTX, p->quant_shift_QTX, qcoeff_ptr, - dqcoeff_ptr, p->dequant_QTX, eob_ptr, sc->scan, - sc->iscan); - } else { - // TODO(luoyi): Need SIMD (e.g. sse2) for smaller block size - // quantization - aom_highbd_quantize_b_c(coeff_ptr, n_coeffs, p->zbin_QTX, - p->round_QTX, p->quant_QTX, - p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, - p->dequant_QTX, eob_ptr, sc->scan, sc->iscan); - } - break; - case 1: - aom_highbd_quantize_b_32x32( - coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX, - p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, - eob_ptr, sc->scan, sc->iscan); - break; - case 2: - aom_highbd_quantize_b_64x64( - coeff_ptr, n_coeffs, p->zbin_QTX, p->round_QTX, p->quant_QTX, - p->quant_shift_QTX, qcoeff_ptr, dqcoeff_ptr, p->dequant_QTX, - eob_ptr, sc->scan, sc->iscan); - break; - default: assert(0); - } - } -} - -static INLINE void highbd_quantize_dc( - const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, - const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t dequant_ptr, uint16_t *eob_ptr, - const qm_val_t *qm_ptr, const qm_val_t *iqm_ptr, const int log_scale) { - int eob = -1; - - memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); - memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); - - if (!skip_block) { - const qm_val_t wt = qm_ptr != NULL ? qm_ptr[0] : (1 << AOM_QM_BITS); - const qm_val_t iwt = iqm_ptr != NULL ? iqm_ptr[0] : (1 << AOM_QM_BITS); - const int coeff = coeff_ptr[0]; - const int coeff_sign = (coeff >> 31); - const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; - const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], log_scale); - const int64_t tmpw = tmp * wt; - const int abs_qcoeff = - (int)((tmpw * quant) >> (16 - log_scale + AOM_QM_BITS)); - qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); - const int dequant = - (dequant_ptr * iwt + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; - - const tran_low_t abs_dqcoeff = (abs_qcoeff * dequant) >> log_scale; - dqcoeff_ptr[0] = (tran_low_t)((abs_dqcoeff ^ coeff_sign) - coeff_sign); - if (abs_qcoeff) eob = 0; - } - *eob_ptr = eob + 1; -} - -void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr, - intptr_t n_coeffs, const MACROBLOCK_PLANE *p, - tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, - const SCAN_ORDER *sc, - const QUANT_PARAM *qparam) { - // obsolete skip_block - const int skip_block = 0; - const qm_val_t *qm_ptr = qparam->qmatrix; - const qm_val_t *iqm_ptr = qparam->iqmatrix; - (void)sc; - - highbd_quantize_dc(coeff_ptr, (int)n_coeffs, skip_block, p->round_QTX, - p->quant_fp_QTX[0], qcoeff_ptr, dqcoeff_ptr, - p->dequant_QTX[0], eob_ptr, qm_ptr, iqm_ptr, - qparam->log_scale); -} - -void av1_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t count, - const int16_t *zbin_ptr, const int16_t *round_ptr, - const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan, - int log_scale) { - highbd_quantize_fp_helper_c(coeff_ptr, count, zbin_ptr, round_ptr, quant_ptr, - quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, - dequant_ptr, eob_ptr, scan, iscan, NULL, NULL, - log_scale); -} - -static void invert_quant(int16_t *quant, int16_t *shift, int d) { - uint32_t t; - int l, m; - t = d; - for (l = 0; t > 1; l++) t >>= 1; - m = 1 + (1 << (16 + l)) / d; - *quant = (int16_t)(m - (1 << 16)); - *shift = 1 << (16 - l); -} - -static int get_qzbin_factor(int q, aom_bit_depth_t bit_depth) { - const int quant = av1_dc_quant_Q3(q, 0, bit_depth); - switch (bit_depth) { - case AOM_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80); - case AOM_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80); - case AOM_BITS_12: return q == 0 ? 64 : (quant < 2368 ? 84 : 80); - default: - assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); - return -1; - } -} - -void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q, - int u_dc_delta_q, int u_ac_delta_q, int v_dc_delta_q, - int v_ac_delta_q, QUANTS *const quants, - Dequants *const deq) { - int i, q, quant_Q3, quant_QTX; - - for (q = 0; q < QINDEX_RANGE; q++) { - const int qzbin_factor = get_qzbin_factor(q, bit_depth); - const int qrounding_factor = q == 0 ? 64 : 48; - - for (i = 0; i < 2; ++i) { - int qrounding_factor_fp = 64; - // y quantizer setup with original coeff shift of Q3 - quant_Q3 = i == 0 ? av1_dc_quant_Q3(q, y_dc_delta_q, bit_depth) - : av1_ac_quant_Q3(q, 0, bit_depth); - // y quantizer with TX scale - quant_QTX = i == 0 ? av1_dc_quant_QTX(q, y_dc_delta_q, bit_depth) - : av1_ac_quant_QTX(q, 0, bit_depth); - invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], - quant_QTX); - quants->y_quant_fp[q][i] = (1 << 16) / quant_QTX; - quants->y_round_fp[q][i] = (qrounding_factor_fp * quant_QTX) >> 7; - quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, 7); - quants->y_round[q][i] = (qrounding_factor * quant_QTX) >> 7; - deq->y_dequant_QTX[q][i] = quant_QTX; - deq->y_dequant_Q3[q][i] = quant_Q3; - - // u quantizer setup with original coeff shift of Q3 - quant_Q3 = i == 0 ? av1_dc_quant_Q3(q, u_dc_delta_q, bit_depth) - : av1_ac_quant_Q3(q, u_ac_delta_q, bit_depth); - // u quantizer with TX scale - quant_QTX = i == 0 ? av1_dc_quant_QTX(q, u_dc_delta_q, bit_depth) - : av1_ac_quant_QTX(q, u_ac_delta_q, bit_depth); - invert_quant(&quants->u_quant[q][i], &quants->u_quant_shift[q][i], - quant_QTX); - quants->u_quant_fp[q][i] = (1 << 16) / quant_QTX; - quants->u_round_fp[q][i] = (qrounding_factor_fp * quant_QTX) >> 7; - quants->u_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, 7); - quants->u_round[q][i] = (qrounding_factor * quant_QTX) >> 7; - deq->u_dequant_QTX[q][i] = quant_QTX; - deq->u_dequant_Q3[q][i] = quant_Q3; - - // v quantizer setup with original coeff shift of Q3 - quant_Q3 = i == 0 ? av1_dc_quant_Q3(q, v_dc_delta_q, bit_depth) - : av1_ac_quant_Q3(q, v_ac_delta_q, bit_depth); - // v quantizer with TX scale - quant_QTX = i == 0 ? av1_dc_quant_QTX(q, v_dc_delta_q, bit_depth) - : av1_ac_quant_QTX(q, v_ac_delta_q, bit_depth); - invert_quant(&quants->v_quant[q][i], &quants->v_quant_shift[q][i], - quant_QTX); - quants->v_quant_fp[q][i] = (1 << 16) / quant_QTX; - quants->v_round_fp[q][i] = (qrounding_factor_fp * quant_QTX) >> 7; - quants->v_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant_QTX, 7); - quants->v_round[q][i] = (qrounding_factor * quant_QTX) >> 7; - deq->v_dequant_QTX[q][i] = quant_QTX; - deq->v_dequant_Q3[q][i] = quant_Q3; - } - - for (i = 2; i < 8; i++) { // 8: SIMD width - quants->y_quant[q][i] = quants->y_quant[q][1]; - quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1]; - quants->y_round_fp[q][i] = quants->y_round_fp[q][1]; - quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1]; - quants->y_zbin[q][i] = quants->y_zbin[q][1]; - quants->y_round[q][i] = quants->y_round[q][1]; - deq->y_dequant_QTX[q][i] = deq->y_dequant_QTX[q][1]; - deq->y_dequant_Q3[q][i] = deq->y_dequant_Q3[q][1]; - - quants->u_quant[q][i] = quants->u_quant[q][1]; - quants->u_quant_fp[q][i] = quants->u_quant_fp[q][1]; - quants->u_round_fp[q][i] = quants->u_round_fp[q][1]; - quants->u_quant_shift[q][i] = quants->u_quant_shift[q][1]; - quants->u_zbin[q][i] = quants->u_zbin[q][1]; - quants->u_round[q][i] = quants->u_round[q][1]; - deq->u_dequant_QTX[q][i] = deq->u_dequant_QTX[q][1]; - deq->u_dequant_Q3[q][i] = deq->u_dequant_Q3[q][1]; - quants->v_quant[q][i] = quants->u_quant[q][1]; - quants->v_quant_fp[q][i] = quants->v_quant_fp[q][1]; - quants->v_round_fp[q][i] = quants->v_round_fp[q][1]; - quants->v_quant_shift[q][i] = quants->v_quant_shift[q][1]; - quants->v_zbin[q][i] = quants->v_zbin[q][1]; - quants->v_round[q][i] = quants->v_round[q][1]; - deq->v_dequant_QTX[q][i] = deq->v_dequant_QTX[q][1]; - deq->v_dequant_Q3[q][i] = deq->v_dequant_Q3[q][1]; - } - } -} - -void av1_init_quantizer(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - QUANTS *const quants = &cpi->quants; - Dequants *const dequants = &cpi->dequants; - av1_build_quantizer(cm->seq_params.bit_depth, cm->y_dc_delta_q, - cm->u_dc_delta_q, cm->u_ac_delta_q, cm->v_dc_delta_q, - cm->v_ac_delta_q, quants, dequants); -} - -void av1_init_plane_quantizers(const AV1_COMP *cpi, MACROBLOCK *x, - int segment_id) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - const QUANTS *const quants = &cpi->quants; - - int current_qindex = AOMMAX( - 0, AOMMIN(QINDEX_RANGE - 1, cpi->oxcf.deltaq_mode != NO_DELTA_Q - ? cm->base_qindex + xd->delta_qindex - : cm->base_qindex)); - const int qindex = av1_get_qindex(&cm->seg, segment_id, current_qindex); - const int rdmult = av1_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q); - int qmlevel = (xd->lossless[segment_id] || cm->using_qmatrix == 0) - ? NUM_QM_LEVELS - 1 - : cm->qm_y; - - // Y - x->plane[0].quant_QTX = quants->y_quant[qindex]; - x->plane[0].quant_fp_QTX = quants->y_quant_fp[qindex]; - x->plane[0].round_fp_QTX = quants->y_round_fp[qindex]; - x->plane[0].quant_shift_QTX = quants->y_quant_shift[qindex]; - x->plane[0].zbin_QTX = quants->y_zbin[qindex]; - x->plane[0].round_QTX = quants->y_round[qindex]; - x->plane[0].dequant_QTX = cpi->dequants.y_dequant_QTX[qindex]; - memcpy(&xd->plane[0].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][0], - sizeof(cm->gqmatrix[qmlevel][0])); - memcpy(&xd->plane[0].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][0], - sizeof(cm->giqmatrix[qmlevel][0])); - xd->plane[0].dequant_Q3 = cpi->dequants.y_dequant_Q3[qindex]; - - // U - qmlevel = (xd->lossless[segment_id] || cm->using_qmatrix == 0) - ? NUM_QM_LEVELS - 1 - : cm->qm_u; - { - x->plane[1].quant_QTX = quants->u_quant[qindex]; - x->plane[1].quant_fp_QTX = quants->u_quant_fp[qindex]; - x->plane[1].round_fp_QTX = quants->u_round_fp[qindex]; - x->plane[1].quant_shift_QTX = quants->u_quant_shift[qindex]; - x->plane[1].zbin_QTX = quants->u_zbin[qindex]; - x->plane[1].round_QTX = quants->u_round[qindex]; - x->plane[1].dequant_QTX = cpi->dequants.u_dequant_QTX[qindex]; - memcpy(&xd->plane[1].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][1], - sizeof(cm->gqmatrix[qmlevel][1])); - memcpy(&xd->plane[1].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][1], - sizeof(cm->giqmatrix[qmlevel][1])); - x->plane[1].dequant_QTX = cpi->dequants.u_dequant_QTX[qindex]; - xd->plane[1].dequant_Q3 = cpi->dequants.u_dequant_Q3[qindex]; - } - // V - qmlevel = (xd->lossless[segment_id] || cm->using_qmatrix == 0) - ? NUM_QM_LEVELS - 1 - : cm->qm_v; - { - x->plane[2].quant_QTX = quants->v_quant[qindex]; - x->plane[2].quant_fp_QTX = quants->v_quant_fp[qindex]; - x->plane[2].round_fp_QTX = quants->v_round_fp[qindex]; - x->plane[2].quant_shift_QTX = quants->v_quant_shift[qindex]; - x->plane[2].zbin_QTX = quants->v_zbin[qindex]; - x->plane[2].round_QTX = quants->v_round[qindex]; - x->plane[2].dequant_QTX = cpi->dequants.v_dequant_QTX[qindex]; - memcpy(&xd->plane[2].seg_qmatrix[segment_id], cm->gqmatrix[qmlevel][2], - sizeof(cm->gqmatrix[qmlevel][2])); - memcpy(&xd->plane[2].seg_iqmatrix[segment_id], cm->giqmatrix[qmlevel][2], - sizeof(cm->giqmatrix[qmlevel][2])); - x->plane[2].dequant_QTX = cpi->dequants.v_dequant_QTX[qindex]; - xd->plane[2].dequant_Q3 = cpi->dequants.v_dequant_Q3[qindex]; - } - x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP); - x->qindex = qindex; - - set_error_per_bit(x, rdmult); - - av1_initialize_me_consts(cpi, x, qindex); -} - -void av1_frame_init_quantizer(AV1_COMP *cpi) { - MACROBLOCK *const x = &cpi->td.mb; - MACROBLOCKD *const xd = &x->e_mbd; - av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id); -} - -void av1_set_quantizer(AV1_COMMON *cm, int q) { - // quantizer has to be reinitialized with av1_init_quantizer() if any - // delta_q changes. - cm->base_qindex = AOMMAX(cm->delta_q_present_flag, q); - cm->y_dc_delta_q = 0; - cm->u_dc_delta_q = 0; - cm->u_ac_delta_q = 0; - cm->v_dc_delta_q = 0; - cm->v_ac_delta_q = 0; - cm->qm_y = aom_get_qmlevel(cm->base_qindex, cm->min_qmlevel, cm->max_qmlevel); - cm->qm_u = aom_get_qmlevel(cm->base_qindex + cm->u_ac_delta_q, - cm->min_qmlevel, cm->max_qmlevel); - - if (!cm->seq_params.separate_uv_delta_q) - cm->qm_v = cm->qm_u; - else - cm->qm_v = aom_get_qmlevel(cm->base_qindex + cm->v_ac_delta_q, - cm->min_qmlevel, cm->max_qmlevel); -} - -// Table that converts 0-63 Q-range values passed in outside to the Qindex -// range used internally. -static const int quantizer_to_qindex[] = { - 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, - 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, - 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, - 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204, - 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255, -}; - -int av1_quantizer_to_qindex(int quantizer) { - return quantizer_to_qindex[quantizer]; -} - -int av1_qindex_to_quantizer(int qindex) { - int quantizer; - - for (quantizer = 0; quantizer < 64; ++quantizer) - if (quantizer_to_qindex[quantizer] >= qindex) return quantizer; - - return 63; -} diff --git a/third_party/aom/av1/encoder/av1_quantize.h b/third_party/aom/av1/encoder/av1_quantize.h deleted file mode 100644 index 35af9a67a..000000000 --- a/third_party/aom/av1/encoder/av1_quantize.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_AV1_QUANTIZE_H_ -#define AOM_AV1_ENCODER_AV1_QUANTIZE_H_ - -#include "config/aom_config.h" - -#include "av1/common/quant_common.h" -#include "av1/common/scan.h" -#include "av1/encoder/block.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct QUANT_PARAM { - int log_scale; - TX_SIZE tx_size; - const qm_val_t *qmatrix; - const qm_val_t *iqmatrix; -} QUANT_PARAM; - -typedef void (*AV1_QUANT_FACADE)(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - const MACROBLOCK_PLANE *p, - tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, - const SCAN_ORDER *sc, - const QUANT_PARAM *qparam); - -// The QUANTS structure is used only for internal quantizer setup in -// av1_quantize.c. -// All of its fields use the same coefficient shift/scaling at TX. -typedef struct { - // 0: dc 1: ac 2-8: ac repeated to SIMD width - DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]); - - // TODO(jingning): in progress of re-working the quantization. will decide - // if we want to deprecate the current use of y_quant. - DECLARE_ALIGNED(16, int16_t, y_quant_fp[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, u_quant_fp[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, v_quant_fp[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, y_round_fp[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, u_round_fp[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, v_round_fp[QINDEX_RANGE][8]); - - DECLARE_ALIGNED(16, int16_t, u_quant[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, v_quant[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, u_quant_shift[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, v_quant_shift[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, u_zbin[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, v_zbin[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, u_round[QINDEX_RANGE][8]); - DECLARE_ALIGNED(16, int16_t, v_round[QINDEX_RANGE][8]); -} QUANTS; - -// The Dequants structure is used only for internal quantizer setup in -// av1_quantize.c. -// Fields are sufffixed according to whether or not they're expressed in -// the same coefficient shift/precision as TX or a fixed Q3 format. -typedef struct { - DECLARE_ALIGNED(16, int16_t, - y_dequant_QTX[QINDEX_RANGE][8]); // 8: SIMD width - DECLARE_ALIGNED(16, int16_t, - u_dequant_QTX[QINDEX_RANGE][8]); // 8: SIMD width - DECLARE_ALIGNED(16, int16_t, - v_dequant_QTX[QINDEX_RANGE][8]); // 8: SIMD width - DECLARE_ALIGNED(16, int16_t, y_dequant_Q3[QINDEX_RANGE][8]); // 8: SIMD width - DECLARE_ALIGNED(16, int16_t, u_dequant_Q3[QINDEX_RANGE][8]); // 8: SIMD width - DECLARE_ALIGNED(16, int16_t, v_dequant_Q3[QINDEX_RANGE][8]); // 8: SIMD width -} Dequants; - -struct AV1_COMP; -struct AV1Common; - -void av1_frame_init_quantizer(struct AV1_COMP *cpi); - -void av1_init_plane_quantizers(const struct AV1_COMP *cpi, MACROBLOCK *x, - int segment_id); - -void av1_build_quantizer(aom_bit_depth_t bit_depth, int y_dc_delta_q, - int u_dc_delta_q, int u_ac_delta_q, int v_dc_delta_q, - int v_ac_delta_q, QUANTS *const quants, - Dequants *const deq); - -void av1_init_quantizer(struct AV1_COMP *cpi); - -void av1_set_quantizer(struct AV1Common *cm, int q); - -int av1_quantizer_to_qindex(int quantizer); - -int av1_qindex_to_quantizer(int qindex); - -void av1_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr); - -void av1_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, - const SCAN_ORDER *sc, const QUANT_PARAM *qparam); - -void av1_quantize_b_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, - const SCAN_ORDER *sc, const QUANT_PARAM *qparam); - -void av1_quantize_dc_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, - const SCAN_ORDER *sc, const QUANT_PARAM *qparam); - -void av1_highbd_quantize_fp_facade(const tran_low_t *coeff_ptr, - intptr_t n_coeffs, const MACROBLOCK_PLANE *p, - tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, - const SCAN_ORDER *sc, - const QUANT_PARAM *qparam); - -void av1_highbd_quantize_b_facade(const tran_low_t *coeff_ptr, - intptr_t n_coeffs, const MACROBLOCK_PLANE *p, - tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, - const SCAN_ORDER *sc, - const QUANT_PARAM *qparam); - -void av1_highbd_quantize_dc_facade(const tran_low_t *coeff_ptr, - intptr_t n_coeffs, const MACROBLOCK_PLANE *p, - tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr, - const SCAN_ORDER *sc, - const QUANT_PARAM *qparam); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_AV1_QUANTIZE_H_ diff --git a/third_party/aom/av1/encoder/bitstream.c b/third_party/aom/av1/encoder/bitstream.c deleted file mode 100644 index 2c4acdb02..000000000 --- a/third_party/aom/av1/encoder/bitstream.c +++ /dev/null @@ -1,3999 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include - -#include "aom/aom_encoder.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/binary_codes_writer.h" -#include "aom_dsp/bitwriter_buffer.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/bitops.h" -#include "aom_ports/mem_ops.h" -#include "aom_ports/system_state.h" -#if CONFIG_BITSTREAM_DEBUG -#include "aom_util/debug_util.h" -#endif // CONFIG_BITSTREAM_DEBUG - -#include "av1/common/cdef.h" -#include "av1/common/cfl.h" -#include "av1/common/entropy.h" -#include "av1/common/entropymode.h" -#include "av1/common/entropymv.h" -#include "av1/common/mvref_common.h" -#include "av1/common/pred_common.h" -#include "av1/common/reconinter.h" -#include "av1/common/reconintra.h" -#include "av1/common/seg_common.h" -#include "av1/common/tile_common.h" - -#include "av1/encoder/bitstream.h" -#include "av1/encoder/cost.h" -#include "av1/encoder/encodemv.h" -#include "av1/encoder/encodetxb.h" -#include "av1/encoder/mcomp.h" -#include "av1/encoder/palette.h" -#include "av1/encoder/segmentation.h" -#include "av1/encoder/tokenize.h" - -#define ENC_MISMATCH_DEBUG 0 - -static INLINE void write_uniform(aom_writer *w, int n, int v) { - const int l = get_unsigned_bits(n); - const int m = (1 << l) - n; - if (l == 0) return; - if (v < m) { - aom_write_literal(w, v, l - 1); - } else { - aom_write_literal(w, m + ((v - m) >> 1), l - 1); - aom_write_literal(w, (v - m) & 1, 1); - } -} - -static void loop_restoration_write_sb_coeffs(const AV1_COMMON *const cm, - MACROBLOCKD *xd, - const RestorationUnitInfo *rui, - aom_writer *const w, int plane, - FRAME_COUNTS *counts); - -static void write_intra_y_mode_kf(FRAME_CONTEXT *frame_ctx, - const MB_MODE_INFO *mi, - const MB_MODE_INFO *above_mi, - const MB_MODE_INFO *left_mi, - PREDICTION_MODE mode, aom_writer *w) { - assert(!is_intrabc_block(mi)); - (void)mi; - aom_write_symbol(w, mode, get_y_mode_cdf(frame_ctx, above_mi, left_mi), - INTRA_MODES); -} - -static void write_inter_mode(aom_writer *w, PREDICTION_MODE mode, - FRAME_CONTEXT *ec_ctx, const int16_t mode_ctx) { - const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK; - - aom_write_symbol(w, mode != NEWMV, ec_ctx->newmv_cdf[newmv_ctx], 2); - - if (mode != NEWMV) { - const int16_t zeromv_ctx = - (mode_ctx >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK; - aom_write_symbol(w, mode != GLOBALMV, ec_ctx->zeromv_cdf[zeromv_ctx], 2); - - if (mode != GLOBALMV) { - int16_t refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK; - aom_write_symbol(w, mode != NEARESTMV, ec_ctx->refmv_cdf[refmv_ctx], 2); - } - } -} - -static void write_drl_idx(FRAME_CONTEXT *ec_ctx, const MB_MODE_INFO *mbmi, - const MB_MODE_INFO_EXT *mbmi_ext, aom_writer *w) { - uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); - - assert(mbmi->ref_mv_idx < 3); - - const int new_mv = mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV; - if (new_mv) { - int idx; - for (idx = 0; idx < 2; ++idx) { - if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { - uint8_t drl_ctx = - av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); - - aom_write_symbol(w, mbmi->ref_mv_idx != idx, ec_ctx->drl_cdf[drl_ctx], - 2); - if (mbmi->ref_mv_idx == idx) return; - } - } - return; - } - - if (have_nearmv_in_inter_mode(mbmi->mode)) { - int idx; - // TODO(jingning): Temporary solution to compensate the NEARESTMV offset. - for (idx = 1; idx < 3; ++idx) { - if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { - uint8_t drl_ctx = - av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); - aom_write_symbol(w, mbmi->ref_mv_idx != (idx - 1), - ec_ctx->drl_cdf[drl_ctx], 2); - if (mbmi->ref_mv_idx == (idx - 1)) return; - } - } - return; - } -} - -static void write_inter_compound_mode(MACROBLOCKD *xd, aom_writer *w, - PREDICTION_MODE mode, - const int16_t mode_ctx) { - assert(is_inter_compound_mode(mode)); - aom_write_symbol(w, INTER_COMPOUND_OFFSET(mode), - xd->tile_ctx->inter_compound_mode_cdf[mode_ctx], - INTER_COMPOUND_MODES); -} - -static void write_tx_size_vartx(MACROBLOCKD *xd, const MB_MODE_INFO *mbmi, - TX_SIZE tx_size, int depth, int blk_row, - int blk_col, aom_writer *w) { - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - const int max_blocks_high = max_block_high(xd, mbmi->sb_type, 0); - const int max_blocks_wide = max_block_wide(xd, mbmi->sb_type, 0); - - if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; - - if (depth == MAX_VARTX_DEPTH) { - txfm_partition_update(xd->above_txfm_context + blk_col, - xd->left_txfm_context + blk_row, tx_size, tx_size); - return; - } - - const int ctx = txfm_partition_context(xd->above_txfm_context + blk_col, - xd->left_txfm_context + blk_row, - mbmi->sb_type, tx_size); - const int txb_size_index = - av1_get_txb_size_index(mbmi->sb_type, blk_row, blk_col); - const int write_txfm_partition = - tx_size == mbmi->inter_tx_size[txb_size_index]; - if (write_txfm_partition) { - aom_write_symbol(w, 0, ec_ctx->txfm_partition_cdf[ctx], 2); - - txfm_partition_update(xd->above_txfm_context + blk_col, - xd->left_txfm_context + blk_row, tx_size, tx_size); - // TODO(yuec): set correct txfm partition update for qttx - } else { - const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; - const int bsw = tx_size_wide_unit[sub_txs]; - const int bsh = tx_size_high_unit[sub_txs]; - - aom_write_symbol(w, 1, ec_ctx->txfm_partition_cdf[ctx], 2); - - if (sub_txs == TX_4X4) { - txfm_partition_update(xd->above_txfm_context + blk_col, - xd->left_txfm_context + blk_row, sub_txs, tx_size); - return; - } - - assert(bsw > 0 && bsh > 0); - for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) - for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) { - int offsetr = blk_row + row; - int offsetc = blk_col + col; - write_tx_size_vartx(xd, mbmi, sub_txs, depth + 1, offsetr, offsetc, w); - } - } -} - -static void write_selected_tx_size(const MACROBLOCKD *xd, aom_writer *w) { - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const BLOCK_SIZE bsize = mbmi->sb_type; - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - if (block_signals_txsize(bsize)) { - const TX_SIZE tx_size = mbmi->tx_size; - const int tx_size_ctx = get_tx_size_context(xd); - const int depth = tx_size_to_depth(tx_size, bsize); - const int max_depths = bsize_to_max_depth(bsize); - const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize); - - assert(depth >= 0 && depth <= max_depths); - assert(!is_inter_block(mbmi)); - assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed(xd, mbmi))); - - aom_write_symbol(w, depth, ec_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx], - max_depths + 1); - } -} - -static int write_skip(const AV1_COMMON *cm, const MACROBLOCKD *xd, - int segment_id, const MB_MODE_INFO *mi, aom_writer *w) { - if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { - return 1; - } else { - const int skip = mi->skip; - const int ctx = av1_get_skip_context(xd); - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - aom_write_symbol(w, skip, ec_ctx->skip_cdfs[ctx], 2); - return skip; - } -} - -static int write_skip_mode(const AV1_COMMON *cm, const MACROBLOCKD *xd, - int segment_id, const MB_MODE_INFO *mi, - aom_writer *w) { - if (!cm->skip_mode_flag) return 0; - if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { - return 0; - } - const int skip_mode = mi->skip_mode; - if (!is_comp_ref_allowed(mi->sb_type)) { - assert(!skip_mode); - return 0; - } - if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME) || - segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) { - // These features imply single-reference mode, while skip mode implies - // compound reference. Hence, the two are mutually exclusive. - // In other words, skip_mode is implicitly 0 here. - assert(!skip_mode); - return 0; - } - const int ctx = av1_get_skip_mode_context(xd); - aom_write_symbol(w, skip_mode, xd->tile_ctx->skip_mode_cdfs[ctx], 2); - return skip_mode; -} - -static void write_is_inter(const AV1_COMMON *cm, const MACROBLOCKD *xd, - int segment_id, aom_writer *w, const int is_inter) { - if (!segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { - if (segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) { - assert(is_inter); - return; - } - const int ctx = av1_get_intra_inter_context(xd); - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - aom_write_symbol(w, is_inter, ec_ctx->intra_inter_cdf[ctx], 2); - } -} - -static void write_motion_mode(const AV1_COMMON *cm, MACROBLOCKD *xd, - const MB_MODE_INFO *mbmi, aom_writer *w) { - MOTION_MODE last_motion_mode_allowed = - cm->switchable_motion_mode - ? motion_mode_allowed(cm->global_motion, xd, mbmi, - cm->allow_warped_motion) - : SIMPLE_TRANSLATION; - assert(mbmi->motion_mode <= last_motion_mode_allowed); - switch (last_motion_mode_allowed) { - case SIMPLE_TRANSLATION: break; - case OBMC_CAUSAL: - aom_write_symbol(w, mbmi->motion_mode == OBMC_CAUSAL, - xd->tile_ctx->obmc_cdf[mbmi->sb_type], 2); - break; - default: - aom_write_symbol(w, mbmi->motion_mode, - xd->tile_ctx->motion_mode_cdf[mbmi->sb_type], - MOTION_MODES); - } -} - -static void write_delta_qindex(const MACROBLOCKD *xd, int delta_qindex, - aom_writer *w) { - int sign = delta_qindex < 0; - int abs = sign ? -delta_qindex : delta_qindex; - int rem_bits, thr; - int smallval = abs < DELTA_Q_SMALL ? 1 : 0; - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - - aom_write_symbol(w, AOMMIN(abs, DELTA_Q_SMALL), ec_ctx->delta_q_cdf, - DELTA_Q_PROBS + 1); - - if (!smallval) { - rem_bits = get_msb(abs - 1); - thr = (1 << rem_bits) + 1; - aom_write_literal(w, rem_bits - 1, 3); - aom_write_literal(w, abs - thr, rem_bits); - } - if (abs > 0) { - aom_write_bit(w, sign); - } -} - -static void write_delta_lflevel(const AV1_COMMON *cm, const MACROBLOCKD *xd, - int lf_id, int delta_lflevel, aom_writer *w) { - int sign = delta_lflevel < 0; - int abs = sign ? -delta_lflevel : delta_lflevel; - int rem_bits, thr; - int smallval = abs < DELTA_LF_SMALL ? 1 : 0; - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - - if (cm->delta_lf_multi) { - assert(lf_id >= 0 && lf_id < (av1_num_planes(cm) > 1 ? FRAME_LF_COUNT - : FRAME_LF_COUNT - 2)); - aom_write_symbol(w, AOMMIN(abs, DELTA_LF_SMALL), - ec_ctx->delta_lf_multi_cdf[lf_id], DELTA_LF_PROBS + 1); - } else { - aom_write_symbol(w, AOMMIN(abs, DELTA_LF_SMALL), ec_ctx->delta_lf_cdf, - DELTA_LF_PROBS + 1); - } - - if (!smallval) { - rem_bits = get_msb(abs - 1); - thr = (1 << rem_bits) + 1; - aom_write_literal(w, rem_bits - 1, 3); - aom_write_literal(w, abs - thr, rem_bits); - } - if (abs > 0) { - aom_write_bit(w, sign); - } -} - -static void pack_map_tokens(aom_writer *w, const TOKENEXTRA **tp, int n, - int num) { - const TOKENEXTRA *p = *tp; - write_uniform(w, n, p->token); // The first color index. - ++p; - --num; - for (int i = 0; i < num; ++i) { - aom_write_symbol(w, p->token, p->color_map_cdf, n); - ++p; - } - *tp = p; -} - -static void pack_txb_tokens(aom_writer *w, AV1_COMMON *cm, MACROBLOCK *const x, - const TOKENEXTRA **tp, - const TOKENEXTRA *const tok_end, MACROBLOCKD *xd, - MB_MODE_INFO *mbmi, int plane, - BLOCK_SIZE plane_bsize, aom_bit_depth_t bit_depth, - int block, int blk_row, int blk_col, - TX_SIZE tx_size, TOKEN_STATS *token_stats) { - const int max_blocks_high = max_block_high(xd, plane_bsize, plane); - const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane); - - if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; - - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const TX_SIZE plane_tx_size = - plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x, - pd->subsampling_y) - : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row, - blk_col)]; - - if (tx_size == plane_tx_size || plane) { - tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block); - const uint16_t eob = x->mbmi_ext->eobs[plane][block]; - TXB_CTX txb_ctx = { x->mbmi_ext->txb_skip_ctx[plane][block], - x->mbmi_ext->dc_sign_ctx[plane][block] }; - av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, plane, tx_size, tcoeff, - eob, &txb_ctx); -#if CONFIG_RD_DEBUG - TOKEN_STATS tmp_token_stats; - init_token_stats(&tmp_token_stats); - token_stats->txb_coeff_cost_map[blk_row][blk_col] = tmp_token_stats.cost; - token_stats->cost += tmp_token_stats.cost; -#endif - } else { - const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; - const int bsw = tx_size_wide_unit[sub_txs]; - const int bsh = tx_size_high_unit[sub_txs]; - const int step = bsh * bsw; - - assert(bsw > 0 && bsh > 0); - - for (int r = 0; r < tx_size_high_unit[tx_size]; r += bsh) { - for (int c = 0; c < tx_size_wide_unit[tx_size]; c += bsw) { - const int offsetr = blk_row + r; - const int offsetc = blk_col + c; - if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; - pack_txb_tokens(w, cm, x, tp, tok_end, xd, mbmi, plane, plane_bsize, - bit_depth, block, offsetr, offsetc, sub_txs, - token_stats); - block += step; - } - } - } -} - -static INLINE void set_spatial_segment_id(const AV1_COMMON *const cm, - uint8_t *segment_ids, - BLOCK_SIZE bsize, int mi_row, - int mi_col, int segment_id) { - const int mi_offset = mi_row * cm->mi_cols + mi_col; - const int bw = mi_size_wide[bsize]; - const int bh = mi_size_high[bsize]; - const int xmis = AOMMIN(cm->mi_cols - mi_col, bw); - const int ymis = AOMMIN(cm->mi_rows - mi_row, bh); - int x, y; - - for (y = 0; y < ymis; ++y) - for (x = 0; x < xmis; ++x) - segment_ids[mi_offset + y * cm->mi_cols + x] = segment_id; -} - -int av1_neg_interleave(int x, int ref, int max) { - assert(x < max); - const int diff = x - ref; - if (!ref) return x; - if (ref >= (max - 1)) return -x + max - 1; - if (2 * ref < max) { - if (abs(diff) <= ref) { - if (diff > 0) - return (diff << 1) - 1; - else - return ((-diff) << 1); - } - return x; - } else { - if (abs(diff) < (max - ref)) { - if (diff > 0) - return (diff << 1) - 1; - else - return ((-diff) << 1); - } - return (max - x) - 1; - } -} - -static void write_segment_id(AV1_COMP *cpi, const MB_MODE_INFO *const mbmi, - aom_writer *w, const struct segmentation *seg, - struct segmentation_probs *segp, int mi_row, - int mi_col, int skip) { - if (!seg->enabled || !seg->update_map) return; - - AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; - int cdf_num; - const int pred = av1_get_spatial_seg_pred(cm, xd, mi_row, mi_col, &cdf_num); - - if (skip) { - // Still need to transmit tx size for intra blocks even if skip is - // true. Changing segment_id may make the tx size become invalid, e.g - // changing from lossless to lossy. - assert(is_inter_block(mbmi) || !cpi->has_lossless_segment); - - set_spatial_segment_id(cm, cm->current_frame_seg_map, mbmi->sb_type, mi_row, - mi_col, pred); - set_spatial_segment_id(cm, cpi->segmentation_map, mbmi->sb_type, mi_row, - mi_col, pred); - /* mbmi is read only but we need to update segment_id */ - ((MB_MODE_INFO *)mbmi)->segment_id = pred; - return; - } - - const int coded_id = - av1_neg_interleave(mbmi->segment_id, pred, seg->last_active_segid + 1); - aom_cdf_prob *pred_cdf = segp->spatial_pred_seg_cdf[cdf_num]; - aom_write_symbol(w, coded_id, pred_cdf, MAX_SEGMENTS); - set_spatial_segment_id(cm, cm->current_frame_seg_map, mbmi->sb_type, mi_row, - mi_col, mbmi->segment_id); -} - -#define WRITE_REF_BIT(bname, pname) \ - aom_write_symbol(w, bname, av1_get_pred_cdf_##pname(xd), 2) - -// This function encodes the reference frame -static void write_ref_frames(const AV1_COMMON *cm, const MACROBLOCKD *xd, - aom_writer *w) { - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const int is_compound = has_second_ref(mbmi); - const int segment_id = mbmi->segment_id; - - // If segment level coding of this signal is disabled... - // or the segment allows multiple reference frame options - if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { - assert(!is_compound); - assert(mbmi->ref_frame[0] == - get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME)); - } else if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP) || - segfeature_active(&cm->seg, segment_id, SEG_LVL_GLOBALMV)) { - assert(!is_compound); - assert(mbmi->ref_frame[0] == LAST_FRAME); - } else { - // does the feature use compound prediction or not - // (if not specified at the frame/segment level) - if (cm->reference_mode == REFERENCE_MODE_SELECT) { - if (is_comp_ref_allowed(mbmi->sb_type)) - aom_write_symbol(w, is_compound, av1_get_reference_mode_cdf(xd), 2); - } else { - assert((!is_compound) == (cm->reference_mode == SINGLE_REFERENCE)); - } - - if (is_compound) { - const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi) - ? UNIDIR_COMP_REFERENCE - : BIDIR_COMP_REFERENCE; - aom_write_symbol(w, comp_ref_type, av1_get_comp_reference_type_cdf(xd), - 2); - - if (comp_ref_type == UNIDIR_COMP_REFERENCE) { - const int bit = mbmi->ref_frame[0] == BWDREF_FRAME; - WRITE_REF_BIT(bit, uni_comp_ref_p); - - if (!bit) { - assert(mbmi->ref_frame[0] == LAST_FRAME); - const int bit1 = mbmi->ref_frame[1] == LAST3_FRAME || - mbmi->ref_frame[1] == GOLDEN_FRAME; - WRITE_REF_BIT(bit1, uni_comp_ref_p1); - if (bit1) { - const int bit2 = mbmi->ref_frame[1] == GOLDEN_FRAME; - WRITE_REF_BIT(bit2, uni_comp_ref_p2); - } - } else { - assert(mbmi->ref_frame[1] == ALTREF_FRAME); - } - - return; - } - - assert(comp_ref_type == BIDIR_COMP_REFERENCE); - - const int bit = (mbmi->ref_frame[0] == GOLDEN_FRAME || - mbmi->ref_frame[0] == LAST3_FRAME); - WRITE_REF_BIT(bit, comp_ref_p); - - if (!bit) { - const int bit1 = mbmi->ref_frame[0] == LAST2_FRAME; - WRITE_REF_BIT(bit1, comp_ref_p1); - } else { - const int bit2 = mbmi->ref_frame[0] == GOLDEN_FRAME; - WRITE_REF_BIT(bit2, comp_ref_p2); - } - - const int bit_bwd = mbmi->ref_frame[1] == ALTREF_FRAME; - WRITE_REF_BIT(bit_bwd, comp_bwdref_p); - - if (!bit_bwd) { - WRITE_REF_BIT(mbmi->ref_frame[1] == ALTREF2_FRAME, comp_bwdref_p1); - } - - } else { - const int bit0 = (mbmi->ref_frame[0] <= ALTREF_FRAME && - mbmi->ref_frame[0] >= BWDREF_FRAME); - WRITE_REF_BIT(bit0, single_ref_p1); - - if (bit0) { - const int bit1 = mbmi->ref_frame[0] == ALTREF_FRAME; - WRITE_REF_BIT(bit1, single_ref_p2); - - if (!bit1) { - WRITE_REF_BIT(mbmi->ref_frame[0] == ALTREF2_FRAME, single_ref_p6); - } - } else { - const int bit2 = (mbmi->ref_frame[0] == LAST3_FRAME || - mbmi->ref_frame[0] == GOLDEN_FRAME); - WRITE_REF_BIT(bit2, single_ref_p3); - - if (!bit2) { - const int bit3 = mbmi->ref_frame[0] != LAST_FRAME; - WRITE_REF_BIT(bit3, single_ref_p4); - } else { - const int bit4 = mbmi->ref_frame[0] != LAST3_FRAME; - WRITE_REF_BIT(bit4, single_ref_p5); - } - } - } - } -} - -static void write_filter_intra_mode_info(const AV1_COMMON *cm, - const MACROBLOCKD *xd, - const MB_MODE_INFO *const mbmi, - aom_writer *w) { - if (av1_filter_intra_allowed(cm, mbmi)) { - aom_write_symbol(w, mbmi->filter_intra_mode_info.use_filter_intra, - xd->tile_ctx->filter_intra_cdfs[mbmi->sb_type], 2); - if (mbmi->filter_intra_mode_info.use_filter_intra) { - const FILTER_INTRA_MODE mode = - mbmi->filter_intra_mode_info.filter_intra_mode; - aom_write_symbol(w, mode, xd->tile_ctx->filter_intra_mode_cdf, - FILTER_INTRA_MODES); - } - } -} - -static void write_angle_delta(aom_writer *w, int angle_delta, - aom_cdf_prob *cdf) { - aom_write_symbol(w, angle_delta + MAX_ANGLE_DELTA, cdf, - 2 * MAX_ANGLE_DELTA + 1); -} - -static void write_mb_interp_filter(AV1_COMP *cpi, const MACROBLOCKD *xd, - aom_writer *w) { - AV1_COMMON *const cm = &cpi->common; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - - if (!av1_is_interp_needed(xd)) { - assert(mbmi->interp_filters == - av1_broadcast_interp_filter( - av1_unswitchable_filter(cm->interp_filter))); - return; - } - if (cm->interp_filter == SWITCHABLE) { - int dir; - for (dir = 0; dir < 2; ++dir) { - const int ctx = av1_get_pred_context_switchable_interp(xd, dir); - InterpFilter filter = - av1_extract_interp_filter(mbmi->interp_filters, dir); - aom_write_symbol(w, filter, ec_ctx->switchable_interp_cdf[ctx], - SWITCHABLE_FILTERS); - ++cpi->interp_filter_selected[0][filter]; - if (cm->seq_params.enable_dual_filter == 0) return; - } - } -} - -// Transmit color values with delta encoding. Write the first value as -// literal, and the deltas between each value and the previous one. "min_val" is -// the smallest possible value of the deltas. -static void delta_encode_palette_colors(const int *colors, int num, - int bit_depth, int min_val, - aom_writer *w) { - if (num <= 0) return; - assert(colors[0] < (1 << bit_depth)); - aom_write_literal(w, colors[0], bit_depth); - if (num == 1) return; - int max_delta = 0; - int deltas[PALETTE_MAX_SIZE]; - memset(deltas, 0, sizeof(deltas)); - for (int i = 1; i < num; ++i) { - assert(colors[i] < (1 << bit_depth)); - const int delta = colors[i] - colors[i - 1]; - deltas[i - 1] = delta; - assert(delta >= min_val); - if (delta > max_delta) max_delta = delta; - } - const int min_bits = bit_depth - 3; - int bits = AOMMAX(av1_ceil_log2(max_delta + 1 - min_val), min_bits); - assert(bits <= bit_depth); - int range = (1 << bit_depth) - colors[0] - min_val; - aom_write_literal(w, bits - min_bits, 2); - for (int i = 0; i < num - 1; ++i) { - aom_write_literal(w, deltas[i] - min_val, bits); - range -= deltas[i]; - bits = AOMMIN(bits, av1_ceil_log2(range)); - } -} - -// Transmit luma palette color values. First signal if each color in the color -// cache is used. Those colors that are not in the cache are transmitted with -// delta encoding. -static void write_palette_colors_y(const MACROBLOCKD *const xd, - const PALETTE_MODE_INFO *const pmi, - int bit_depth, aom_writer *w) { - const int n = pmi->palette_size[0]; - uint16_t color_cache[2 * PALETTE_MAX_SIZE]; - const int n_cache = av1_get_palette_cache(xd, 0, color_cache); - int out_cache_colors[PALETTE_MAX_SIZE]; - uint8_t cache_color_found[2 * PALETTE_MAX_SIZE]; - const int n_out_cache = - av1_index_color_cache(color_cache, n_cache, pmi->palette_colors, n, - cache_color_found, out_cache_colors); - int n_in_cache = 0; - for (int i = 0; i < n_cache && n_in_cache < n; ++i) { - const int found = cache_color_found[i]; - aom_write_bit(w, found); - n_in_cache += found; - } - assert(n_in_cache + n_out_cache == n); - delta_encode_palette_colors(out_cache_colors, n_out_cache, bit_depth, 1, w); -} - -// Write chroma palette color values. U channel is handled similarly to the luma -// channel. For v channel, either use delta encoding or transmit raw values -// directly, whichever costs less. -static void write_palette_colors_uv(const MACROBLOCKD *const xd, - const PALETTE_MODE_INFO *const pmi, - int bit_depth, aom_writer *w) { - const int n = pmi->palette_size[1]; - const uint16_t *colors_u = pmi->palette_colors + PALETTE_MAX_SIZE; - const uint16_t *colors_v = pmi->palette_colors + 2 * PALETTE_MAX_SIZE; - // U channel colors. - uint16_t color_cache[2 * PALETTE_MAX_SIZE]; - const int n_cache = av1_get_palette_cache(xd, 1, color_cache); - int out_cache_colors[PALETTE_MAX_SIZE]; - uint8_t cache_color_found[2 * PALETTE_MAX_SIZE]; - const int n_out_cache = av1_index_color_cache( - color_cache, n_cache, colors_u, n, cache_color_found, out_cache_colors); - int n_in_cache = 0; - for (int i = 0; i < n_cache && n_in_cache < n; ++i) { - const int found = cache_color_found[i]; - aom_write_bit(w, found); - n_in_cache += found; - } - delta_encode_palette_colors(out_cache_colors, n_out_cache, bit_depth, 0, w); - - // V channel colors. Don't use color cache as the colors are not sorted. - const int max_val = 1 << bit_depth; - int zero_count = 0, min_bits_v = 0; - int bits_v = - av1_get_palette_delta_bits_v(pmi, bit_depth, &zero_count, &min_bits_v); - const int rate_using_delta = - 2 + bit_depth + (bits_v + 1) * (n - 1) - zero_count; - const int rate_using_raw = bit_depth * n; - if (rate_using_delta < rate_using_raw) { // delta encoding - assert(colors_v[0] < (1 << bit_depth)); - aom_write_bit(w, 1); - aom_write_literal(w, bits_v - min_bits_v, 2); - aom_write_literal(w, colors_v[0], bit_depth); - for (int i = 1; i < n; ++i) { - assert(colors_v[i] < (1 << bit_depth)); - if (colors_v[i] == colors_v[i - 1]) { // No need to signal sign bit. - aom_write_literal(w, 0, bits_v); - continue; - } - const int delta = abs((int)colors_v[i] - colors_v[i - 1]); - const int sign_bit = colors_v[i] < colors_v[i - 1]; - if (delta <= max_val - delta) { - aom_write_literal(w, delta, bits_v); - aom_write_bit(w, sign_bit); - } else { - aom_write_literal(w, max_val - delta, bits_v); - aom_write_bit(w, !sign_bit); - } - } - } else { // Transmit raw values. - aom_write_bit(w, 0); - for (int i = 0; i < n; ++i) { - assert(colors_v[i] < (1 << bit_depth)); - aom_write_literal(w, colors_v[i], bit_depth); - } - } -} - -static void write_palette_mode_info(const AV1_COMMON *cm, const MACROBLOCKD *xd, - const MB_MODE_INFO *const mbmi, int mi_row, - int mi_col, aom_writer *w) { - const int num_planes = av1_num_planes(cm); - const BLOCK_SIZE bsize = mbmi->sb_type; - assert(av1_allow_palette(cm->allow_screen_content_tools, bsize)); - const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - const int bsize_ctx = av1_get_palette_bsize_ctx(bsize); - - if (mbmi->mode == DC_PRED) { - const int n = pmi->palette_size[0]; - const int palette_y_mode_ctx = av1_get_palette_mode_ctx(xd); - aom_write_symbol( - w, n > 0, - xd->tile_ctx->palette_y_mode_cdf[bsize_ctx][palette_y_mode_ctx], 2); - if (n > 0) { - aom_write_symbol(w, n - PALETTE_MIN_SIZE, - xd->tile_ctx->palette_y_size_cdf[bsize_ctx], - PALETTE_SIZES); - write_palette_colors_y(xd, pmi, cm->seq_params.bit_depth, w); - } - } - - const int uv_dc_pred = - num_planes > 1 && mbmi->uv_mode == UV_DC_PRED && - is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x, - xd->plane[1].subsampling_y); - if (uv_dc_pred) { - const int n = pmi->palette_size[1]; - const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0); - aom_write_symbol(w, n > 0, - xd->tile_ctx->palette_uv_mode_cdf[palette_uv_mode_ctx], 2); - if (n > 0) { - aom_write_symbol(w, n - PALETTE_MIN_SIZE, - xd->tile_ctx->palette_uv_size_cdf[bsize_ctx], - PALETTE_SIZES); - write_palette_colors_uv(xd, pmi, cm->seq_params.bit_depth, w); - } - } -} - -void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd, - int blk_row, int blk_col, int plane, TX_SIZE tx_size, - aom_writer *w) { - MB_MODE_INFO *mbmi = xd->mi[0]; - const int is_inter = is_inter_block(mbmi); - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - - // Only y plane's tx_type is transmitted - if (plane > 0) return; - PLANE_TYPE plane_type = get_plane_type(plane); - TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, tx_size, - cm->reduced_tx_set_used); - - const TX_SIZE square_tx_size = txsize_sqr_map[tx_size]; - if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 && - ((!cm->seg.enabled && cm->base_qindex > 0) || - (cm->seg.enabled && xd->qindex[mbmi->segment_id] > 0)) && - !mbmi->skip && - !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - const TxSetType tx_set_type = - av1_get_ext_tx_set_type(tx_size, is_inter, cm->reduced_tx_set_used); - const int eset = get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used); - // eset == 0 should correspond to a set with only DCT_DCT and there - // is no need to send the tx_type - assert(eset > 0); - assert(av1_ext_tx_used[tx_set_type][tx_type]); - if (is_inter) { - aom_write_symbol(w, av1_ext_tx_ind[tx_set_type][tx_type], - ec_ctx->inter_ext_tx_cdf[eset][square_tx_size], - av1_num_ext_tx_set[tx_set_type]); - } else { - PREDICTION_MODE intra_dir; - if (mbmi->filter_intra_mode_info.use_filter_intra) - intra_dir = - fimode_to_intradir[mbmi->filter_intra_mode_info.filter_intra_mode]; - else - intra_dir = mbmi->mode; - aom_write_symbol( - w, av1_ext_tx_ind[tx_set_type][tx_type], - ec_ctx->intra_ext_tx_cdf[eset][square_tx_size][intra_dir], - av1_num_ext_tx_set[tx_set_type]); - } - } -} - -static void write_intra_y_mode_nonkf(FRAME_CONTEXT *frame_ctx, BLOCK_SIZE bsize, - PREDICTION_MODE mode, aom_writer *w) { - aom_write_symbol(w, mode, frame_ctx->y_mode_cdf[size_group_lookup[bsize]], - INTRA_MODES); -} - -static void write_intra_uv_mode(FRAME_CONTEXT *frame_ctx, - UV_PREDICTION_MODE uv_mode, - PREDICTION_MODE y_mode, - CFL_ALLOWED_TYPE cfl_allowed, aom_writer *w) { - aom_write_symbol(w, uv_mode, frame_ctx->uv_mode_cdf[cfl_allowed][y_mode], - UV_INTRA_MODES - !cfl_allowed); -} - -static void write_cfl_alphas(FRAME_CONTEXT *const ec_ctx, int idx, - int joint_sign, aom_writer *w) { - aom_write_symbol(w, joint_sign, ec_ctx->cfl_sign_cdf, CFL_JOINT_SIGNS); - // Magnitudes are only signaled for nonzero codes. - if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) { - aom_cdf_prob *cdf_u = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)]; - aom_write_symbol(w, CFL_IDX_U(idx), cdf_u, CFL_ALPHABET_SIZE); - } - if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) { - aom_cdf_prob *cdf_v = ec_ctx->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)]; - aom_write_symbol(w, CFL_IDX_V(idx), cdf_v, CFL_ALPHABET_SIZE); - } -} - -static void write_cdef(AV1_COMMON *cm, MACROBLOCKD *const xd, aom_writer *w, - int skip, int mi_col, int mi_row) { - if (cm->coded_lossless || cm->allow_intrabc) { - // Initialize to indicate no CDEF for safety. - cm->cdef_bits = 0; - cm->cdef_strengths[0] = 0; - cm->nb_cdef_strengths = 1; - cm->cdef_uv_strengths[0] = 0; - return; - } - - const int m = ~((1 << (6 - MI_SIZE_LOG2)) - 1); - const MB_MODE_INFO *mbmi = - cm->mi_grid_visible[(mi_row & m) * cm->mi_stride + (mi_col & m)]; - // Initialise when at top left part of the superblock - if (!(mi_row & (cm->seq_params.mib_size - 1)) && - !(mi_col & (cm->seq_params.mib_size - 1))) { // Top left? - xd->cdef_preset[0] = xd->cdef_preset[1] = xd->cdef_preset[2] = - xd->cdef_preset[3] = -1; - } - - // Emit CDEF param at first non-skip coding block - const int mask = 1 << (6 - MI_SIZE_LOG2); - const int index = cm->seq_params.sb_size == BLOCK_128X128 - ? !!(mi_col & mask) + 2 * !!(mi_row & mask) - : 0; - if (xd->cdef_preset[index] == -1 && !skip) { - aom_write_literal(w, mbmi->cdef_strength, cm->cdef_bits); - xd->cdef_preset[index] = mbmi->cdef_strength; - } -} - -static void write_inter_segment_id(AV1_COMP *cpi, aom_writer *w, - const struct segmentation *const seg, - struct segmentation_probs *const segp, - int mi_row, int mi_col, int skip, - int preskip) { - MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - AV1_COMMON *const cm = &cpi->common; - - if (seg->update_map) { - if (preskip) { - if (!seg->segid_preskip) return; - } else { - if (seg->segid_preskip) return; - if (skip) { - write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, 1); - if (seg->temporal_update) ((MB_MODE_INFO *)mbmi)->seg_id_predicted = 0; - return; - } - } - if (seg->temporal_update) { - const int pred_flag = mbmi->seg_id_predicted; - aom_cdf_prob *pred_cdf = av1_get_pred_cdf_seg_id(segp, xd); - aom_write_symbol(w, pred_flag, pred_cdf, 2); - if (!pred_flag) { - write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, 0); - } - if (pred_flag) { - set_spatial_segment_id(cm, cm->current_frame_seg_map, mbmi->sb_type, - mi_row, mi_col, mbmi->segment_id); - } - } else { - write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, 0); - } - } -} - -// If delta q is present, writes delta_q index. -// Also writes delta_q loop filter levels, if present. -static void write_delta_q_params(AV1_COMP *cpi, const int mi_row, - const int mi_col, int skip, aom_writer *w) { - AV1_COMMON *const cm = &cpi->common; - if (cm->delta_q_present_flag) { - MACROBLOCK *const x = &cpi->td.mb; - MACROBLOCKD *const xd = &x->e_mbd; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const BLOCK_SIZE bsize = mbmi->sb_type; - const int super_block_upper_left = - ((mi_row & (cm->seq_params.mib_size - 1)) == 0) && - ((mi_col & (cm->seq_params.mib_size - 1)) == 0); - - if ((bsize != cm->seq_params.sb_size || skip == 0) && - super_block_upper_left) { - assert(mbmi->current_qindex > 0); - const int reduced_delta_qindex = - (mbmi->current_qindex - xd->current_qindex) / cm->delta_q_res; - write_delta_qindex(xd, reduced_delta_qindex, w); - xd->current_qindex = mbmi->current_qindex; - if (cm->delta_lf_present_flag) { - if (cm->delta_lf_multi) { - const int frame_lf_count = - av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; - for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) { - int reduced_delta_lflevel = - (mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) / - cm->delta_lf_res; - write_delta_lflevel(cm, xd, lf_id, reduced_delta_lflevel, w); - xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id]; - } - } else { - int reduced_delta_lflevel = - (mbmi->delta_lf_from_base - xd->delta_lf_from_base) / - cm->delta_lf_res; - write_delta_lflevel(cm, xd, -1, reduced_delta_lflevel, w); - xd->delta_lf_from_base = mbmi->delta_lf_from_base; - } - } - } - } -} - -static void write_intra_prediction_modes(AV1_COMP *cpi, const int mi_row, - const int mi_col, int is_keyframe, - aom_writer *w) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->td.mb; - MACROBLOCKD *const xd = &x->e_mbd; - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const PREDICTION_MODE mode = mbmi->mode; - const BLOCK_SIZE bsize = mbmi->sb_type; - - // Y mode. - if (is_keyframe) { - const MB_MODE_INFO *const above_mi = xd->above_mbmi; - const MB_MODE_INFO *const left_mi = xd->left_mbmi; - write_intra_y_mode_kf(ec_ctx, mbmi, above_mi, left_mi, mode, w); - } else { - write_intra_y_mode_nonkf(ec_ctx, bsize, mode, w); - } - - // Y angle delta. - const int use_angle_delta = av1_use_angle_delta(bsize); - if (use_angle_delta && av1_is_directional_mode(mode)) { - write_angle_delta(w, mbmi->angle_delta[PLANE_TYPE_Y], - ec_ctx->angle_delta_cdf[mode - V_PRED]); - } - - // UV mode and UV angle delta. - if (!cm->seq_params.monochrome && - is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x, - xd->plane[1].subsampling_y)) { - const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode; - write_intra_uv_mode(ec_ctx, uv_mode, mode, is_cfl_allowed(xd), w); - if (uv_mode == UV_CFL_PRED) - write_cfl_alphas(ec_ctx, mbmi->cfl_alpha_idx, mbmi->cfl_alpha_signs, w); - if (use_angle_delta && av1_is_directional_mode(get_uv_mode(uv_mode))) { - write_angle_delta(w, mbmi->angle_delta[PLANE_TYPE_UV], - ec_ctx->angle_delta_cdf[uv_mode - V_PRED]); - } - } - - // Palette. - if (av1_allow_palette(cm->allow_screen_content_tools, bsize)) { - write_palette_mode_info(cm, xd, mbmi, mi_row, mi_col, w); - } - - // Filter intra. - write_filter_intra_mode_info(cm, xd, mbmi, w); -} - -static void pack_inter_mode_mvs(AV1_COMP *cpi, const int mi_row, - const int mi_col, aom_writer *w) { - AV1_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->td.mb; - MACROBLOCKD *const xd = &x->e_mbd; - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - const struct segmentation *const seg = &cm->seg; - struct segmentation_probs *const segp = &ec_ctx->seg; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; - const PREDICTION_MODE mode = mbmi->mode; - const int segment_id = mbmi->segment_id; - const BLOCK_SIZE bsize = mbmi->sb_type; - const int allow_hp = cm->allow_high_precision_mv; - const int is_inter = is_inter_block(mbmi); - const int is_compound = has_second_ref(mbmi); - int ref; - - write_inter_segment_id(cpi, w, seg, segp, mi_row, mi_col, 0, 1); - - write_skip_mode(cm, xd, segment_id, mbmi, w); - - assert(IMPLIES(mbmi->skip_mode, mbmi->skip)); - const int skip = - mbmi->skip_mode ? 1 : write_skip(cm, xd, segment_id, mbmi, w); - - write_inter_segment_id(cpi, w, seg, segp, mi_row, mi_col, skip, 0); - - write_cdef(cm, xd, w, skip, mi_col, mi_row); - - write_delta_q_params(cpi, mi_row, mi_col, skip, w); - - if (!mbmi->skip_mode) write_is_inter(cm, xd, mbmi->segment_id, w, is_inter); - - if (mbmi->skip_mode) return; - - if (!is_inter) { - write_intra_prediction_modes(cpi, mi_row, mi_col, 0, w); - } else { - int16_t mode_ctx; - - av1_collect_neighbors_ref_counts(xd); - - write_ref_frames(cm, xd, w); - - mode_ctx = - av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame); - - // If segment skip is not enabled code the mode. - if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { - if (is_inter_compound_mode(mode)) - write_inter_compound_mode(xd, w, mode, mode_ctx); - else if (is_inter_singleref_mode(mode)) - write_inter_mode(w, mode, ec_ctx, mode_ctx); - - if (mode == NEWMV || mode == NEW_NEWMV || have_nearmv_in_inter_mode(mode)) - write_drl_idx(ec_ctx, mbmi, mbmi_ext, w); - else - assert(mbmi->ref_mv_idx == 0); - } - - if (mode == NEWMV || mode == NEW_NEWMV) { - for (ref = 0; ref < 1 + is_compound; ++ref) { - nmv_context *nmvc = &ec_ctx->nmvc; - const int_mv ref_mv = av1_get_ref_mv(x, ref); - av1_encode_mv(cpi, w, &mbmi->mv[ref].as_mv, &ref_mv.as_mv, nmvc, - allow_hp); - } - } else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) { - nmv_context *nmvc = &ec_ctx->nmvc; - const int_mv ref_mv = av1_get_ref_mv(x, 1); - av1_encode_mv(cpi, w, &mbmi->mv[1].as_mv, &ref_mv.as_mv, nmvc, allow_hp); - } else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) { - nmv_context *nmvc = &ec_ctx->nmvc; - const int_mv ref_mv = av1_get_ref_mv(x, 0); - av1_encode_mv(cpi, w, &mbmi->mv[0].as_mv, &ref_mv.as_mv, nmvc, allow_hp); - } - - if (cpi->common.reference_mode != COMPOUND_REFERENCE && - cpi->common.seq_params.enable_interintra_compound && - is_interintra_allowed(mbmi)) { - const int interintra = mbmi->ref_frame[1] == INTRA_FRAME; - const int bsize_group = size_group_lookup[bsize]; - aom_write_symbol(w, interintra, ec_ctx->interintra_cdf[bsize_group], 2); - if (interintra) { - aom_write_symbol(w, mbmi->interintra_mode, - ec_ctx->interintra_mode_cdf[bsize_group], - INTERINTRA_MODES); - if (is_interintra_wedge_used(bsize)) { - aom_write_symbol(w, mbmi->use_wedge_interintra, - ec_ctx->wedge_interintra_cdf[bsize], 2); - if (mbmi->use_wedge_interintra) { - aom_write_symbol(w, mbmi->interintra_wedge_index, - ec_ctx->wedge_idx_cdf[bsize], 16); - assert(mbmi->interintra_wedge_sign == 0); - } - } - } - } - - if (mbmi->ref_frame[1] != INTRA_FRAME) write_motion_mode(cm, xd, mbmi, w); - - // First write idx to indicate current compound inter prediction mode group - // Group A (0): jnt_comp, compound_average - // Group B (1): interintra, compound_diffwtd, wedge - if (has_second_ref(mbmi)) { - const int masked_compound_used = is_any_masked_compound_used(bsize) && - cm->seq_params.enable_masked_compound; - - if (masked_compound_used) { - const int ctx_comp_group_idx = get_comp_group_idx_context(xd); - aom_write_symbol(w, mbmi->comp_group_idx, - ec_ctx->comp_group_idx_cdf[ctx_comp_group_idx], 2); - } else { - assert(mbmi->comp_group_idx == 0); - } - - if (mbmi->comp_group_idx == 0) { - if (mbmi->compound_idx) - assert(mbmi->interinter_comp.type == COMPOUND_AVERAGE); - - if (cm->seq_params.enable_jnt_comp) { - const int comp_index_ctx = get_comp_index_context(cm, xd); - aom_write_symbol(w, mbmi->compound_idx, - ec_ctx->compound_index_cdf[comp_index_ctx], 2); - } else { - assert(mbmi->compound_idx == 1); - } - } else { - assert(cpi->common.reference_mode != SINGLE_REFERENCE && - is_inter_compound_mode(mbmi->mode) && - mbmi->motion_mode == SIMPLE_TRANSLATION); - assert(masked_compound_used); - // compound_diffwtd, wedge - assert(mbmi->interinter_comp.type == COMPOUND_WEDGE || - mbmi->interinter_comp.type == COMPOUND_DIFFWTD); - - if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) - aom_write_symbol(w, mbmi->interinter_comp.type - 1, - ec_ctx->compound_type_cdf[bsize], - COMPOUND_TYPES - 1); - - if (mbmi->interinter_comp.type == COMPOUND_WEDGE) { - assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize)); - aom_write_symbol(w, mbmi->interinter_comp.wedge_index, - ec_ctx->wedge_idx_cdf[bsize], 16); - aom_write_bit(w, mbmi->interinter_comp.wedge_sign); - } else { - assert(mbmi->interinter_comp.type == COMPOUND_DIFFWTD); - aom_write_literal(w, mbmi->interinter_comp.mask_type, - MAX_DIFFWTD_MASK_BITS); - } - } - } - - write_mb_interp_filter(cpi, xd, w); - } -} - -static void write_intrabc_info(MACROBLOCKD *xd, - const MB_MODE_INFO_EXT *mbmi_ext, - aom_writer *w) { - const MB_MODE_INFO *const mbmi = xd->mi[0]; - int use_intrabc = is_intrabc_block(mbmi); - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - aom_write_symbol(w, use_intrabc, ec_ctx->intrabc_cdf, 2); - if (use_intrabc) { - assert(mbmi->mode == DC_PRED); - assert(mbmi->uv_mode == UV_DC_PRED); - assert(mbmi->motion_mode == SIMPLE_TRANSLATION); - int_mv dv_ref = mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv; - av1_encode_dv(w, &mbmi->mv[0].as_mv, &dv_ref.as_mv, &ec_ctx->ndvc); - } -} - -static void write_mb_modes_kf(AV1_COMP *cpi, MACROBLOCKD *xd, - const MB_MODE_INFO_EXT *mbmi_ext, - const int mi_row, const int mi_col, - aom_writer *w) { - AV1_COMMON *const cm = &cpi->common; - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - const struct segmentation *const seg = &cm->seg; - struct segmentation_probs *const segp = &ec_ctx->seg; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - - if (seg->segid_preskip && seg->update_map) - write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, 0); - - const int skip = write_skip(cm, xd, mbmi->segment_id, mbmi, w); - - if (!seg->segid_preskip && seg->update_map) - write_segment_id(cpi, mbmi, w, seg, segp, mi_row, mi_col, skip); - - write_cdef(cm, xd, w, skip, mi_col, mi_row); - - write_delta_q_params(cpi, mi_row, mi_col, skip, w); - - if (av1_allow_intrabc(cm)) { - write_intrabc_info(xd, mbmi_ext, w); - if (is_intrabc_block(mbmi)) return; - } - - write_intra_prediction_modes(cpi, mi_row, mi_col, 1, w); -} - -#if CONFIG_RD_DEBUG -static void dump_mode_info(MODE_INFO *mi) { - printf("\nmi->mi_row == %d\n", mi->mi_row); - printf("&& mi->mi_col == %d\n", mi->mi_col); - printf("&& mi->sb_type == %d\n", mi->sb_type); - printf("&& mi->tx_size == %d\n", mi->tx_size); - printf("&& mi->mode == %d\n", mi->mode); -} -static int rd_token_stats_mismatch(RD_STATS *rd_stats, TOKEN_STATS *token_stats, - int plane) { - if (rd_stats->txb_coeff_cost[plane] != token_stats->cost) { - int r, c; - printf("\nplane %d rd_stats->txb_coeff_cost %d token_stats->cost %d\n", - plane, rd_stats->txb_coeff_cost[plane], token_stats->cost); - printf("rd txb_coeff_cost_map\n"); - for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) { - for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) { - printf("%d ", rd_stats->txb_coeff_cost_map[plane][r][c]); - } - printf("\n"); - } - - printf("pack txb_coeff_cost_map\n"); - for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) { - for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) { - printf("%d ", token_stats->txb_coeff_cost_map[r][c]); - } - printf("\n"); - } - return 1; - } - return 0; -} -#endif - -#if ENC_MISMATCH_DEBUG -static void enc_dump_logs(AV1_COMP *cpi, int mi_row, int mi_col) { - AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; - xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); - const MB_MODE_INFO *const *mbmi = xd->mi[0]; - if (is_inter_block(mbmi)) { -#define FRAME_TO_CHECK 11 - if (cm->current_video_frame == FRAME_TO_CHECK && cm->show_frame == 1) { - const BLOCK_SIZE bsize = mbmi->sb_type; - - int_mv mv[2]; - int is_comp_ref = has_second_ref(mbmi); - int ref; - - for (ref = 0; ref < 1 + is_comp_ref; ++ref) - mv[ref].as_mv = mbmi->mv[ref].as_mv; - - if (!is_comp_ref) { - mv[1].as_int = 0; - } - - MACROBLOCK *const x = &cpi->td.mb; - const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; - const int16_t mode_ctx = - is_comp_ref ? mbmi_ext->compound_mode_context[mbmi->ref_frame[0]] - : av1_mode_context_analyzer(mbmi_ext->mode_context, - mbmi->ref_frame); - - const int16_t newmv_ctx = mode_ctx & NEWMV_CTX_MASK; - int16_t zeromv_ctx = -1; - int16_t refmv_ctx = -1; - - if (mbmi->mode != NEWMV) { - zeromv_ctx = (mode_ctx >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK; - if (mbmi->mode != GLOBALMV) - refmv_ctx = (mode_ctx >> REFMV_OFFSET) & REFMV_CTX_MASK; - } - - printf( - "=== ENCODER ===: " - "Frame=%d, (mi_row,mi_col)=(%d,%d), skip_mode=%d, mode=%d, bsize=%d, " - "show_frame=%d, mv[0]=(%d,%d), mv[1]=(%d,%d), ref[0]=%d, " - "ref[1]=%d, motion_mode=%d, mode_ctx=%d, " - "newmv_ctx=%d, zeromv_ctx=%d, refmv_ctx=%d, tx_size=%d\n", - cm->current_video_frame, mi_row, mi_col, mbmi->skip_mode, mbmi->mode, - bsize, cm->show_frame, mv[0].as_mv.row, mv[0].as_mv.col, - mv[1].as_mv.row, mv[1].as_mv.col, mbmi->ref_frame[0], - mbmi->ref_frame[1], mbmi->motion_mode, mode_ctx, newmv_ctx, - zeromv_ctx, refmv_ctx, mbmi->tx_size); - } - } -} -#endif // ENC_MISMATCH_DEBUG - -static void write_mbmi_b(AV1_COMP *cpi, const TileInfo *const tile, - aom_writer *w, int mi_row, int mi_col) { - AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; - int bh, bw; - xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); - MB_MODE_INFO *m = xd->mi[0]; - - assert(m->sb_type <= cm->seq_params.sb_size || - (m->sb_type >= BLOCK_SIZES && m->sb_type < BLOCK_SIZES_ALL)); - - bh = mi_size_high[m->sb_type]; - bw = mi_size_wide[m->sb_type]; - - cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); - - set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); - - xd->above_txfm_context = cm->above_txfm_context[tile->tile_row] + mi_col; - xd->left_txfm_context = - xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); - - if (frame_is_intra_only(cm)) { - write_mb_modes_kf(cpi, xd, cpi->td.mb.mbmi_ext, mi_row, mi_col, w); - } else { - // has_subpel_mv_component needs the ref frame buffers set up to look - // up if they are scaled. has_subpel_mv_component is in turn needed by - // write_switchable_interp_filter, which is called by pack_inter_mode_mvs. - set_ref_ptrs(cm, xd, m->ref_frame[0], m->ref_frame[1]); - -#if ENC_MISMATCH_DEBUG - enc_dump_logs(cpi, mi_row, mi_col); -#endif // ENC_MISMATCH_DEBUG - - pack_inter_mode_mvs(cpi, mi_row, mi_col, w); - } -} - -static void write_inter_txb_coeff(AV1_COMMON *const cm, MACROBLOCK *const x, - MB_MODE_INFO *const mbmi, aom_writer *w, - const TOKENEXTRA **tok, - const TOKENEXTRA *const tok_end, - TOKEN_STATS *token_stats, const int row, - const int col, int *block, const int plane) { - MACROBLOCKD *const xd = &x->e_mbd; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE bsize = mbmi->sb_type; - const BLOCK_SIZE bsizec = - scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y); - - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsizec, pd->subsampling_x, pd->subsampling_y); - - const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, plane); - const int step = - tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size]; - const int bkw = tx_size_wide_unit[max_tx_size]; - const int bkh = tx_size_high_unit[max_tx_size]; - - const BLOCK_SIZE max_unit_bsize = - get_plane_block_size(BLOCK_64X64, pd->subsampling_x, pd->subsampling_y); - int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0]; - int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0]; - - int blk_row, blk_col; - - const int num_4x4_w = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - const int num_4x4_h = block_size_high[plane_bsize] >> tx_size_high_log2[0]; - - const int unit_height = - AOMMIN(mu_blocks_high + (row >> pd->subsampling_y), num_4x4_h); - const int unit_width = - AOMMIN(mu_blocks_wide + (col >> pd->subsampling_x), num_4x4_w); - for (blk_row = row >> pd->subsampling_y; blk_row < unit_height; - blk_row += bkh) { - for (blk_col = col >> pd->subsampling_x; blk_col < unit_width; - blk_col += bkw) { - pack_txb_tokens(w, cm, x, tok, tok_end, xd, mbmi, plane, plane_bsize, - cm->seq_params.bit_depth, *block, blk_row, blk_col, - max_tx_size, token_stats); - *block += step; - } - } -} - -static void write_tokens_b(AV1_COMP *cpi, const TileInfo *const tile, - aom_writer *w, const TOKENEXTRA **tok, - const TOKENEXTRA *const tok_end, int mi_row, - int mi_col) { - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; - const int mi_offset = mi_row * cm->mi_stride + mi_col; - MB_MODE_INFO *const mbmi = *(cm->mi_grid_visible + mi_offset); - int plane; - int bh, bw; - MACROBLOCK *const x = &cpi->td.mb; - (void)tok; - (void)tok_end; - xd->mi = cm->mi_grid_visible + mi_offset; - - assert(mbmi->sb_type <= cm->seq_params.sb_size || - (mbmi->sb_type >= BLOCK_SIZES && mbmi->sb_type < BLOCK_SIZES_ALL)); - - bh = mi_size_high[mbmi->sb_type]; - bw = mi_size_wide[mbmi->sb_type]; - cpi->td.mb.mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); - - set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); - - if (!mbmi->skip) { - if (!is_inter_block(mbmi)) - av1_write_coeffs_mb(cm, x, mi_row, mi_col, w, mbmi->sb_type); - - if (is_inter_block(mbmi)) { - int block[MAX_MB_PLANE] = { 0 }; - const BLOCK_SIZE plane_bsize = mbmi->sb_type; - assert(plane_bsize == get_plane_block_size(mbmi->sb_type, - xd->plane[0].subsampling_x, - xd->plane[0].subsampling_y)); - const int num_4x4_w = - block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - const int num_4x4_h = - block_size_high[plane_bsize] >> tx_size_high_log2[0]; - int row, col; - TOKEN_STATS token_stats; - init_token_stats(&token_stats); - - const BLOCK_SIZE max_unit_bsize = BLOCK_64X64; - assert(max_unit_bsize == - get_plane_block_size(BLOCK_64X64, xd->plane[0].subsampling_x, - xd->plane[0].subsampling_y)); - int mu_blocks_wide = - block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0]; - int mu_blocks_high = - block_size_high[max_unit_bsize] >> tx_size_high_log2[0]; - - mu_blocks_wide = AOMMIN(num_4x4_w, mu_blocks_wide); - mu_blocks_high = AOMMIN(num_4x4_h, mu_blocks_high); - - for (row = 0; row < num_4x4_h; row += mu_blocks_high) { - for (col = 0; col < num_4x4_w; col += mu_blocks_wide) { - for (plane = 0; plane < num_planes && is_inter_block(mbmi); ++plane) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - if (!is_chroma_reference(mi_row, mi_col, mbmi->sb_type, - pd->subsampling_x, pd->subsampling_y)) { - continue; - } - write_inter_txb_coeff(cm, x, mbmi, w, tok, tok_end, &token_stats, - row, col, &block[plane], plane); - } - } -#if CONFIG_RD_DEBUG - if (mbmi->sb_type >= BLOCK_8X8 && - rd_token_stats_mismatch(&mbmi->rd_stats, &token_stats, plane)) { - dump_mode_info(m); - assert(0); - } -#endif // CONFIG_RD_DEBUG - } - } - } -} - -static void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile, - aom_writer *w, const TOKENEXTRA **tok, - const TOKENEXTRA *const tok_end, int mi_row, - int mi_col) { - write_mbmi_b(cpi, tile, w, mi_row, mi_col); - - AV1_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &cpi->td.mb.e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - for (int plane = 0; plane < AOMMIN(2, av1_num_planes(cm)); ++plane) { - const uint8_t palette_size_plane = - mbmi->palette_mode_info.palette_size[plane]; - assert(!mbmi->skip_mode || !palette_size_plane); - if (palette_size_plane > 0) { - assert(mbmi->use_intrabc == 0); - assert(av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type)); - int rows, cols; - av1_get_block_dimensions(mbmi->sb_type, plane, xd, NULL, NULL, &rows, - &cols); - assert(*tok < tok_end); - pack_map_tokens(w, tok, palette_size_plane, rows * cols); - } - } - - BLOCK_SIZE bsize = mbmi->sb_type; - int is_inter_tx = is_inter_block(mbmi) || is_intrabc_block(mbmi); - int skip = mbmi->skip; - int segment_id = mbmi->segment_id; - if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(bsize) && - !(is_inter_tx && skip) && !xd->lossless[segment_id]) { - if (is_inter_tx) { // This implies skip flag is 0. - const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, bsize, 0); - const int txbh = tx_size_high_unit[max_tx_size]; - const int txbw = tx_size_wide_unit[max_tx_size]; - const int width = block_size_wide[bsize] >> tx_size_wide_log2[0]; - const int height = block_size_high[bsize] >> tx_size_high_log2[0]; - int idx, idy; - for (idy = 0; idy < height; idy += txbh) - for (idx = 0; idx < width; idx += txbw) - write_tx_size_vartx(xd, mbmi, max_tx_size, 0, idy, idx, w); - } else { - write_selected_tx_size(xd, w); - set_txfm_ctxs(mbmi->tx_size, xd->n4_w, xd->n4_h, 0, xd); - } - } else { - set_txfm_ctxs(mbmi->tx_size, xd->n4_w, xd->n4_h, - skip && is_inter_block(mbmi), xd); - } - - write_tokens_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); -} - -static void write_partition(const AV1_COMMON *const cm, - const MACROBLOCKD *const xd, int hbs, int mi_row, - int mi_col, PARTITION_TYPE p, BLOCK_SIZE bsize, - aom_writer *w) { - const int is_partition_point = bsize >= BLOCK_8X8; - - if (!is_partition_point) return; - - const int has_rows = (mi_row + hbs) < cm->mi_rows; - const int has_cols = (mi_col + hbs) < cm->mi_cols; - const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - - if (!has_rows && !has_cols) { - assert(p == PARTITION_SPLIT); - return; - } - - if (has_rows && has_cols) { - aom_write_symbol(w, p, ec_ctx->partition_cdf[ctx], - partition_cdf_length(bsize)); - } else if (!has_rows && has_cols) { - assert(p == PARTITION_SPLIT || p == PARTITION_HORZ); - assert(bsize > BLOCK_8X8); - aom_cdf_prob cdf[2]; - partition_gather_vert_alike(cdf, ec_ctx->partition_cdf[ctx], bsize); - aom_write_cdf(w, p == PARTITION_SPLIT, cdf, 2); - } else { - assert(has_rows && !has_cols); - assert(p == PARTITION_SPLIT || p == PARTITION_VERT); - assert(bsize > BLOCK_8X8); - aom_cdf_prob cdf[2]; - partition_gather_horz_alike(cdf, ec_ctx->partition_cdf[ctx], bsize); - aom_write_cdf(w, p == PARTITION_SPLIT, cdf, 2); - } -} - -static void write_modes_sb(AV1_COMP *const cpi, const TileInfo *const tile, - aom_writer *const w, const TOKENEXTRA **tok, - const TOKENEXTRA *const tok_end, int mi_row, - int mi_col, BLOCK_SIZE bsize) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; - const int hbs = mi_size_wide[bsize] / 2; - const int quarter_step = mi_size_wide[bsize] / 4; - int i; - const PARTITION_TYPE partition = get_partition(cm, mi_row, mi_col, bsize); - const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); - - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - - const int num_planes = av1_num_planes(cm); - for (int plane = 0; plane < num_planes; ++plane) { - int rcol0, rcol1, rrow0, rrow1; - if (av1_loop_restoration_corners_in_sb(cm, plane, mi_row, mi_col, bsize, - &rcol0, &rcol1, &rrow0, &rrow1)) { - const int rstride = cm->rst_info[plane].horz_units_per_tile; - for (int rrow = rrow0; rrow < rrow1; ++rrow) { - for (int rcol = rcol0; rcol < rcol1; ++rcol) { - const int runit_idx = rcol + rrow * rstride; - const RestorationUnitInfo *rui = - &cm->rst_info[plane].unit_info[runit_idx]; - loop_restoration_write_sb_coeffs(cm, xd, rui, w, plane, - cpi->td.counts); - } - } - } - } - - write_partition(cm, xd, hbs, mi_row, mi_col, partition, bsize, w); - switch (partition) { - case PARTITION_NONE: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); - break; - case PARTITION_HORZ: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); - if (mi_row + hbs < cm->mi_rows) - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col); - break; - case PARTITION_VERT: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); - if (mi_col + hbs < cm->mi_cols) - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs); - break; - case PARTITION_SPLIT: - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs, subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col, subsize); - write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs, - subsize); - break; - case PARTITION_HORZ_A: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col); - break; - case PARTITION_HORZ_B: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs); - break; - case PARTITION_VERT_A: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs); - break; - case PARTITION_VERT_B: - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs); - write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs); - break; - case PARTITION_HORZ_4: - for (i = 0; i < 4; ++i) { - int this_mi_row = mi_row + i * quarter_step; - if (i > 0 && this_mi_row >= cm->mi_rows) break; - - write_modes_b(cpi, tile, w, tok, tok_end, this_mi_row, mi_col); - } - break; - case PARTITION_VERT_4: - for (i = 0; i < 4; ++i) { - int this_mi_col = mi_col + i * quarter_step; - if (i > 0 && this_mi_col >= cm->mi_cols) break; - - write_modes_b(cpi, tile, w, tok, tok_end, mi_row, this_mi_col); - } - break; - default: assert(0); - } - - // update partition context - update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition); -} - -static void write_modes(AV1_COMP *const cpi, const TileInfo *const tile, - aom_writer *const w, int tile_row, int tile_col) { - AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; - const int mi_row_start = tile->mi_row_start; - const int mi_row_end = tile->mi_row_end; - const int mi_col_start = tile->mi_col_start; - const int mi_col_end = tile->mi_col_end; - int mi_row, mi_col, sb_row_in_tile; - - av1_zero_above_context(cm, xd, mi_col_start, mi_col_end, tile->tile_row); - av1_init_above_context(cm, xd, tile->tile_row); - - if (cpi->common.delta_q_present_flag) { - xd->current_qindex = cpi->common.base_qindex; - if (cpi->common.delta_lf_present_flag) { - av1_reset_loop_filter_delta(xd, av1_num_planes(cm)); - } - } - - for (mi_row = mi_row_start; mi_row < mi_row_end; - mi_row += cm->seq_params.mib_size) { - sb_row_in_tile = - (mi_row - tile->mi_row_start) >> cm->seq_params.mib_size_log2; - const TOKENEXTRA *tok = - cpi->tplist[tile_row][tile_col][sb_row_in_tile].start; - const TOKENEXTRA *tok_end = - tok + cpi->tplist[tile_row][tile_col][sb_row_in_tile].count; - - av1_zero_left_context(xd); - - for (mi_col = mi_col_start; mi_col < mi_col_end; - mi_col += cm->seq_params.mib_size) { - write_modes_sb(cpi, tile, w, &tok, tok_end, mi_row, mi_col, - cm->seq_params.sb_size); - } - assert(tok == cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop); - } -} - -static void encode_restoration_mode(AV1_COMMON *cm, - struct aom_write_bit_buffer *wb) { - assert(!cm->all_lossless); - if (!cm->seq_params.enable_restoration) return; - if (cm->allow_intrabc) return; - const int num_planes = av1_num_planes(cm); - int all_none = 1, chroma_none = 1; - for (int p = 0; p < num_planes; ++p) { - RestorationInfo *rsi = &cm->rst_info[p]; - if (rsi->frame_restoration_type != RESTORE_NONE) { - all_none = 0; - chroma_none &= p == 0; - } - switch (rsi->frame_restoration_type) { - case RESTORE_NONE: - aom_wb_write_bit(wb, 0); - aom_wb_write_bit(wb, 0); - break; - case RESTORE_WIENER: - aom_wb_write_bit(wb, 1); - aom_wb_write_bit(wb, 0); - break; - case RESTORE_SGRPROJ: - aom_wb_write_bit(wb, 1); - aom_wb_write_bit(wb, 1); - break; - case RESTORE_SWITCHABLE: - aom_wb_write_bit(wb, 0); - aom_wb_write_bit(wb, 1); - break; - default: assert(0); - } - } - if (!all_none) { - assert(cm->seq_params.sb_size == BLOCK_64X64 || - cm->seq_params.sb_size == BLOCK_128X128); - const int sb_size = cm->seq_params.sb_size == BLOCK_128X128 ? 128 : 64; - - RestorationInfo *rsi = &cm->rst_info[0]; - - assert(rsi->restoration_unit_size >= sb_size); - assert(RESTORATION_UNITSIZE_MAX == 256); - - if (sb_size == 64) { - aom_wb_write_bit(wb, rsi->restoration_unit_size > 64); - } - if (rsi->restoration_unit_size > 64) { - aom_wb_write_bit(wb, rsi->restoration_unit_size > 128); - } - } - - if (num_planes > 1) { - int s = AOMMIN(cm->seq_params.subsampling_x, cm->seq_params.subsampling_y); - if (s && !chroma_none) { - aom_wb_write_bit(wb, cm->rst_info[1].restoration_unit_size != - cm->rst_info[0].restoration_unit_size); - assert(cm->rst_info[1].restoration_unit_size == - cm->rst_info[0].restoration_unit_size || - cm->rst_info[1].restoration_unit_size == - (cm->rst_info[0].restoration_unit_size >> s)); - assert(cm->rst_info[2].restoration_unit_size == - cm->rst_info[1].restoration_unit_size); - } else if (!s) { - assert(cm->rst_info[1].restoration_unit_size == - cm->rst_info[0].restoration_unit_size); - assert(cm->rst_info[2].restoration_unit_size == - cm->rst_info[1].restoration_unit_size); - } - } -} - -static void write_wiener_filter(int wiener_win, const WienerInfo *wiener_info, - WienerInfo *ref_wiener_info, aom_writer *wb) { - if (wiener_win == WIENER_WIN) - aom_write_primitive_refsubexpfin( - wb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1, - WIENER_FILT_TAP0_SUBEXP_K, - ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV, - wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV); - else - assert(wiener_info->vfilter[0] == 0 && - wiener_info->vfilter[WIENER_WIN - 1] == 0); - aom_write_primitive_refsubexpfin( - wb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1, - WIENER_FILT_TAP1_SUBEXP_K, - ref_wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV, - wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV); - aom_write_primitive_refsubexpfin( - wb, WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1, - WIENER_FILT_TAP2_SUBEXP_K, - ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV, - wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV); - if (wiener_win == WIENER_WIN) - aom_write_primitive_refsubexpfin( - wb, WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1, - WIENER_FILT_TAP0_SUBEXP_K, - ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV, - wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV); - else - assert(wiener_info->hfilter[0] == 0 && - wiener_info->hfilter[WIENER_WIN - 1] == 0); - aom_write_primitive_refsubexpfin( - wb, WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1, - WIENER_FILT_TAP1_SUBEXP_K, - ref_wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV, - wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV); - aom_write_primitive_refsubexpfin( - wb, WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1, - WIENER_FILT_TAP2_SUBEXP_K, - ref_wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV, - wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV); - memcpy(ref_wiener_info, wiener_info, sizeof(*wiener_info)); -} - -static void write_sgrproj_filter(const SgrprojInfo *sgrproj_info, - SgrprojInfo *ref_sgrproj_info, - aom_writer *wb) { - aom_write_literal(wb, sgrproj_info->ep, SGRPROJ_PARAMS_BITS); - const sgr_params_type *params = &sgr_params[sgrproj_info->ep]; - - if (params->r[0] == 0) { - assert(sgrproj_info->xqd[0] == 0); - aom_write_primitive_refsubexpfin( - wb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K, - ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, - sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1); - } else if (params->r[1] == 0) { - aom_write_primitive_refsubexpfin( - wb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K, - ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, - sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0); - } else { - aom_write_primitive_refsubexpfin( - wb, SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K, - ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, - sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0); - aom_write_primitive_refsubexpfin( - wb, SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K, - ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, - sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1); - } - - memcpy(ref_sgrproj_info, sgrproj_info, sizeof(*sgrproj_info)); -} - -static void loop_restoration_write_sb_coeffs(const AV1_COMMON *const cm, - MACROBLOCKD *xd, - const RestorationUnitInfo *rui, - aom_writer *const w, int plane, - FRAME_COUNTS *counts) { - const RestorationInfo *rsi = cm->rst_info + plane; - RestorationType frame_rtype = rsi->frame_restoration_type; - if (frame_rtype == RESTORE_NONE) return; - - (void)counts; - assert(!cm->all_lossless); - - const int wiener_win = (plane > 0) ? WIENER_WIN_CHROMA : WIENER_WIN; - WienerInfo *wiener_info = xd->wiener_info + plane; - SgrprojInfo *sgrproj_info = xd->sgrproj_info + plane; - RestorationType unit_rtype = rui->restoration_type; - - if (frame_rtype == RESTORE_SWITCHABLE) { - aom_write_symbol(w, unit_rtype, xd->tile_ctx->switchable_restore_cdf, - RESTORE_SWITCHABLE_TYPES); -#if CONFIG_ENTROPY_STATS - ++counts->switchable_restore[unit_rtype]; -#endif - switch (unit_rtype) { - case RESTORE_WIENER: - write_wiener_filter(wiener_win, &rui->wiener_info, wiener_info, w); - break; - case RESTORE_SGRPROJ: - write_sgrproj_filter(&rui->sgrproj_info, sgrproj_info, w); - break; - default: assert(unit_rtype == RESTORE_NONE); break; - } - } else if (frame_rtype == RESTORE_WIENER) { - aom_write_symbol(w, unit_rtype != RESTORE_NONE, - xd->tile_ctx->wiener_restore_cdf, 2); -#if CONFIG_ENTROPY_STATS - ++counts->wiener_restore[unit_rtype != RESTORE_NONE]; -#endif - if (unit_rtype != RESTORE_NONE) { - write_wiener_filter(wiener_win, &rui->wiener_info, wiener_info, w); - } - } else if (frame_rtype == RESTORE_SGRPROJ) { - aom_write_symbol(w, unit_rtype != RESTORE_NONE, - xd->tile_ctx->sgrproj_restore_cdf, 2); -#if CONFIG_ENTROPY_STATS - ++counts->sgrproj_restore[unit_rtype != RESTORE_NONE]; -#endif - if (unit_rtype != RESTORE_NONE) { - write_sgrproj_filter(&rui->sgrproj_info, sgrproj_info, w); - } - } -} - -static void encode_loopfilter(AV1_COMMON *cm, struct aom_write_bit_buffer *wb) { - assert(!cm->coded_lossless); - if (cm->allow_intrabc) return; - const int num_planes = av1_num_planes(cm); - int i; - struct loopfilter *lf = &cm->lf; - - // Encode the loop filter level and type - aom_wb_write_literal(wb, lf->filter_level[0], 6); - aom_wb_write_literal(wb, lf->filter_level[1], 6); - if (num_planes > 1) { - if (lf->filter_level[0] || lf->filter_level[1]) { - aom_wb_write_literal(wb, lf->filter_level_u, 6); - aom_wb_write_literal(wb, lf->filter_level_v, 6); - } - } - aom_wb_write_literal(wb, lf->sharpness_level, 3); - - // Write out loop filter deltas applied at the MB level based on mode or - // ref frame (if they are enabled). - aom_wb_write_bit(wb, lf->mode_ref_delta_enabled); - - if (lf->mode_ref_delta_enabled) { - aom_wb_write_bit(wb, lf->mode_ref_delta_update); - - if (lf->mode_ref_delta_update) { - const int prime_idx = cm->primary_ref_frame; - const int buf_idx = - prime_idx == PRIMARY_REF_NONE ? -1 : cm->frame_refs[prime_idx].idx; - int8_t last_ref_deltas[REF_FRAMES]; - if (prime_idx == PRIMARY_REF_NONE || buf_idx < 0) { - av1_set_default_ref_deltas(last_ref_deltas); - } else { - memcpy(last_ref_deltas, cm->buffer_pool->frame_bufs[buf_idx].ref_deltas, - REF_FRAMES); - } - for (i = 0; i < REF_FRAMES; i++) { - const int delta = lf->ref_deltas[i]; - const int changed = delta != last_ref_deltas[i]; - aom_wb_write_bit(wb, changed); - if (changed) aom_wb_write_inv_signed_literal(wb, delta, 6); - } - - int8_t last_mode_deltas[MAX_MODE_LF_DELTAS]; - if (prime_idx == PRIMARY_REF_NONE || buf_idx < 0) { - av1_set_default_mode_deltas(last_mode_deltas); - } else { - memcpy(last_mode_deltas, - cm->buffer_pool->frame_bufs[buf_idx].mode_deltas, - MAX_MODE_LF_DELTAS); - } - for (i = 0; i < MAX_MODE_LF_DELTAS; i++) { - const int delta = lf->mode_deltas[i]; - const int changed = delta != last_mode_deltas[i]; - aom_wb_write_bit(wb, changed); - if (changed) aom_wb_write_inv_signed_literal(wb, delta, 6); - } - } - } -} - -static void encode_cdef(const AV1_COMMON *cm, struct aom_write_bit_buffer *wb) { - assert(!cm->coded_lossless); - if (!cm->seq_params.enable_cdef) return; - if (cm->allow_intrabc) return; - const int num_planes = av1_num_planes(cm); - int i; - aom_wb_write_literal(wb, cm->cdef_pri_damping - 3, 2); - assert(cm->cdef_pri_damping == cm->cdef_sec_damping); - aom_wb_write_literal(wb, cm->cdef_bits, 2); - for (i = 0; i < cm->nb_cdef_strengths; i++) { - aom_wb_write_literal(wb, cm->cdef_strengths[i], CDEF_STRENGTH_BITS); - if (num_planes > 1) - aom_wb_write_literal(wb, cm->cdef_uv_strengths[i], CDEF_STRENGTH_BITS); - } -} - -static void write_delta_q(struct aom_write_bit_buffer *wb, int delta_q) { - if (delta_q != 0) { - aom_wb_write_bit(wb, 1); - aom_wb_write_inv_signed_literal(wb, delta_q, 6); - } else { - aom_wb_write_bit(wb, 0); - } -} - -static void encode_quantization(const AV1_COMMON *const cm, - struct aom_write_bit_buffer *wb) { - const int num_planes = av1_num_planes(cm); - - aom_wb_write_literal(wb, cm->base_qindex, QINDEX_BITS); - write_delta_q(wb, cm->y_dc_delta_q); - if (num_planes > 1) { - int diff_uv_delta = (cm->u_dc_delta_q != cm->v_dc_delta_q) || - (cm->u_ac_delta_q != cm->v_ac_delta_q); - if (cm->seq_params.separate_uv_delta_q) aom_wb_write_bit(wb, diff_uv_delta); - write_delta_q(wb, cm->u_dc_delta_q); - write_delta_q(wb, cm->u_ac_delta_q); - if (diff_uv_delta) { - write_delta_q(wb, cm->v_dc_delta_q); - write_delta_q(wb, cm->v_ac_delta_q); - } - } - aom_wb_write_bit(wb, cm->using_qmatrix); - if (cm->using_qmatrix) { - aom_wb_write_literal(wb, cm->qm_y, QM_LEVEL_BITS); - aom_wb_write_literal(wb, cm->qm_u, QM_LEVEL_BITS); - if (!cm->seq_params.separate_uv_delta_q) - assert(cm->qm_u == cm->qm_v); - else - aom_wb_write_literal(wb, cm->qm_v, QM_LEVEL_BITS); - } -} - -static void encode_segmentation(AV1_COMMON *cm, MACROBLOCKD *xd, - struct aom_write_bit_buffer *wb) { - int i, j; - struct segmentation *seg = &cm->seg; - - aom_wb_write_bit(wb, seg->enabled); - if (!seg->enabled) return; - - // Write update flags - if (cm->primary_ref_frame == PRIMARY_REF_NONE) { - assert(seg->update_map == 1); - seg->temporal_update = 0; - assert(seg->update_data == 1); - } else { - aom_wb_write_bit(wb, seg->update_map); - if (seg->update_map) { - // Select the coding strategy (temporal or spatial) - av1_choose_segmap_coding_method(cm, xd); - aom_wb_write_bit(wb, seg->temporal_update); - } - aom_wb_write_bit(wb, seg->update_data); - } - - // Segmentation data - if (seg->update_data) { - for (i = 0; i < MAX_SEGMENTS; i++) { - for (j = 0; j < SEG_LVL_MAX; j++) { - const int active = segfeature_active(seg, i, j); - aom_wb_write_bit(wb, active); - if (active) { - const int data_max = av1_seg_feature_data_max(j); - const int data_min = -data_max; - const int ubits = get_unsigned_bits(data_max); - const int data = clamp(get_segdata(seg, i, j), data_min, data_max); - - if (av1_is_segfeature_signed(j)) { - aom_wb_write_inv_signed_literal(wb, data, ubits); - } else { - aom_wb_write_literal(wb, data, ubits); - } - } - } - } - } -} - -static void write_tx_mode(AV1_COMMON *cm, TX_MODE *mode, - struct aom_write_bit_buffer *wb) { - if (cm->coded_lossless) { - *mode = ONLY_4X4; - return; - } - aom_wb_write_bit(wb, *mode == TX_MODE_SELECT); -} - -static void write_frame_interp_filter(InterpFilter filter, - struct aom_write_bit_buffer *wb) { - aom_wb_write_bit(wb, filter == SWITCHABLE); - if (filter != SWITCHABLE) - aom_wb_write_literal(wb, filter, LOG_SWITCHABLE_FILTERS); -} - -static void fix_interp_filter(AV1_COMMON *cm, FRAME_COUNTS *counts) { - if (cm->interp_filter == SWITCHABLE) { - // Check to see if only one of the filters is actually used - int count[SWITCHABLE_FILTERS]; - int i, j, c = 0; - for (i = 0; i < SWITCHABLE_FILTERS; ++i) { - count[i] = 0; - for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) - count[i] += counts->switchable_interp[j][i]; - c += (count[i] > 0); - } - if (c == 1) { - // Only one filter is used. So set the filter at frame level - for (i = 0; i < SWITCHABLE_FILTERS; ++i) { - if (count[i]) { - if (i == EIGHTTAP_REGULAR) cm->interp_filter = i; - break; - } - } - } - } -} - -// Same function as write_uniform but writing to uncompresses header wb -static void wb_write_uniform(struct aom_write_bit_buffer *wb, int n, int v) { - const int l = get_unsigned_bits(n); - const int m = (1 << l) - n; - if (l == 0) return; - if (v < m) { - aom_wb_write_literal(wb, v, l - 1); - } else { - aom_wb_write_literal(wb, m + ((v - m) >> 1), l - 1); - aom_wb_write_literal(wb, (v - m) & 1, 1); - } -} - -static void write_tile_info_max_tile(const AV1_COMMON *const cm, - struct aom_write_bit_buffer *wb) { - int width_mi = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2); - int height_mi = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2); - int width_sb = width_mi >> cm->seq_params.mib_size_log2; - int height_sb = height_mi >> cm->seq_params.mib_size_log2; - int size_sb, i; - - aom_wb_write_bit(wb, cm->uniform_tile_spacing_flag); - - if (cm->uniform_tile_spacing_flag) { - // Uniform spaced tiles with power-of-two number of rows and columns - // tile columns - int ones = cm->log2_tile_cols - cm->min_log2_tile_cols; - while (ones--) { - aom_wb_write_bit(wb, 1); - } - if (cm->log2_tile_cols < cm->max_log2_tile_cols) { - aom_wb_write_bit(wb, 0); - } - - // rows - ones = cm->log2_tile_rows - cm->min_log2_tile_rows; - while (ones--) { - aom_wb_write_bit(wb, 1); - } - if (cm->log2_tile_rows < cm->max_log2_tile_rows) { - aom_wb_write_bit(wb, 0); - } - } else { - // Explicit tiles with configurable tile widths and heights - // columns - for (i = 0; i < cm->tile_cols; i++) { - size_sb = cm->tile_col_start_sb[i + 1] - cm->tile_col_start_sb[i]; - wb_write_uniform(wb, AOMMIN(width_sb, cm->max_tile_width_sb), - size_sb - 1); - width_sb -= size_sb; - } - assert(width_sb == 0); - - // rows - for (i = 0; i < cm->tile_rows; i++) { - size_sb = cm->tile_row_start_sb[i + 1] - cm->tile_row_start_sb[i]; - wb_write_uniform(wb, AOMMIN(height_sb, cm->max_tile_height_sb), - size_sb - 1); - height_sb -= size_sb; - } - assert(height_sb == 0); - } -} - -static void write_tile_info(const AV1_COMMON *const cm, - struct aom_write_bit_buffer *saved_wb, - struct aom_write_bit_buffer *wb) { - write_tile_info_max_tile(cm, wb); - - *saved_wb = *wb; - if (cm->tile_rows * cm->tile_cols > 1) { - // tile id used for cdf update - aom_wb_write_literal(wb, 0, cm->log2_tile_cols + cm->log2_tile_rows); - // Number of bytes in tile size - 1 - aom_wb_write_literal(wb, 3, 2); - } -} - -static void write_ext_tile_info(const AV1_COMMON *const cm, - struct aom_write_bit_buffer *saved_wb, - struct aom_write_bit_buffer *wb) { - // This information is stored as a separate byte. - int mod = wb->bit_offset % CHAR_BIT; - if (mod > 0) aom_wb_write_literal(wb, 0, CHAR_BIT - mod); - assert(aom_wb_is_byte_aligned(wb)); - - *saved_wb = *wb; - if (cm->tile_rows * cm->tile_cols > 1) { - // Note that the last item in the uncompressed header is the data - // describing tile configuration. - // Number of bytes in tile column size - 1 - aom_wb_write_literal(wb, 0, 2); - // Number of bytes in tile size - 1 - aom_wb_write_literal(wb, 0, 2); - } -} - -static int get_refresh_mask(AV1_COMP *cpi) { - if ((cpi->common.frame_type == KEY_FRAME && cpi->common.show_frame) || - frame_is_sframe(&cpi->common)) - return 0xFF; - - int refresh_mask = 0; - - // NOTE(zoeliu): When LAST_FRAME is to get refreshed, the decoder will be - // notified to get LAST3_FRAME refreshed and then the virtual indexes for all - // the 3 LAST reference frames will be updated accordingly, i.e.: - // (1) The original virtual index for LAST3_FRAME will become the new virtual - // index for LAST_FRAME; and - // (2) The original virtual indexes for LAST_FRAME and LAST2_FRAME will be - // shifted and become the new virtual indexes for LAST2_FRAME and - // LAST3_FRAME. - refresh_mask |= - (cpi->refresh_last_frame << cpi->ref_fb_idx[LAST_REF_FRAMES - 1]); -#if USE_SYMM_MULTI_LAYER - refresh_mask |= - (cpi->new_bwdref_update_rule == 1) - ? (cpi->refresh_bwd_ref_frame << cpi->ref_fb_idx[EXTREF_FRAME - 1]) - : (cpi->refresh_bwd_ref_frame << cpi->ref_fb_idx[BWDREF_FRAME - 1]); -#else - refresh_mask |= - (cpi->refresh_bwd_ref_frame << cpi->ref_fb_idx[BWDREF_FRAME - 1]); -#endif - refresh_mask |= - (cpi->refresh_alt2_ref_frame << cpi->ref_fb_idx[ALTREF2_FRAME - 1]); - - if (av1_preserve_existing_gf(cpi)) { - // We have decided to preserve the previously existing golden frame as our - // new ARF frame. However, in the short term we leave it in the GF slot and, - // if we're updating the GF with the current decoded frame, we save it - // instead to the ARF slot. - // Later, in the function av1_encoder.c:av1_update_reference_frames() we - // will swap gld_fb_idx and alt_fb_idx to achieve our objective. We do it - // there so that it can be done outside of the recode loop. - // Note: This is highly specific to the use of ARF as a forward reference, - // and this needs to be generalized as other uses are implemented - // (like RTC/temporal scalability). - - if (cpi->preserve_arf_as_gld) { - return refresh_mask; - } else { - return refresh_mask | - (cpi->refresh_golden_frame << cpi->ref_fb_idx[ALTREF_FRAME - 1]); - } - } else { - const int arf_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1]; - return refresh_mask | - (cpi->refresh_golden_frame << cpi->ref_fb_idx[GOLDEN_FRAME - 1]) | - (cpi->refresh_alt_ref_frame << arf_idx); - } -} - -static INLINE int find_identical_tile( - const int tile_row, const int tile_col, - TileBufferEnc (*const tile_buffers)[MAX_TILE_COLS]) { - const MV32 candidate_offset[1] = { { 1, 0 } }; - const uint8_t *const cur_tile_data = - tile_buffers[tile_row][tile_col].data + 4; - const size_t cur_tile_size = tile_buffers[tile_row][tile_col].size; - - int i; - - if (tile_row == 0) return 0; - - // (TODO: yunqingwang) For now, only above tile is checked and used. - // More candidates such as left tile can be added later. - for (i = 0; i < 1; i++) { - int row_offset = candidate_offset[0].row; - int col_offset = candidate_offset[0].col; - int row = tile_row - row_offset; - int col = tile_col - col_offset; - uint8_t tile_hdr; - const uint8_t *tile_data; - TileBufferEnc *candidate; - - if (row < 0 || col < 0) continue; - - tile_hdr = *(tile_buffers[row][col].data); - - // Read out tcm bit - if ((tile_hdr >> 7) == 1) { - // The candidate is a copy tile itself - row_offset += tile_hdr & 0x7f; - row = tile_row - row_offset; - } - - candidate = &tile_buffers[row][col]; - - if (row_offset >= 128 || candidate->size != cur_tile_size) continue; - - tile_data = candidate->data + 4; - - if (memcmp(tile_data, cur_tile_data, cur_tile_size) != 0) continue; - - // Identical tile found - assert(row_offset > 0); - return row_offset; - } - - // No identical tile found - return 0; -} - -static void write_render_size(const AV1_COMMON *cm, - struct aom_write_bit_buffer *wb) { - const int scaling_active = av1_resize_scaled(cm); - aom_wb_write_bit(wb, scaling_active); - if (scaling_active) { - aom_wb_write_literal(wb, cm->render_width - 1, 16); - aom_wb_write_literal(wb, cm->render_height - 1, 16); - } -} - -static void write_superres_scale(const AV1_COMMON *const cm, - struct aom_write_bit_buffer *wb) { - const SequenceHeader *const seq_params = &cm->seq_params; - if (!seq_params->enable_superres) { - assert(cm->superres_scale_denominator == SCALE_NUMERATOR); - return; - } - - // First bit is whether to to scale or not - if (cm->superres_scale_denominator == SCALE_NUMERATOR) { - aom_wb_write_bit(wb, 0); // no scaling - } else { - aom_wb_write_bit(wb, 1); // scaling, write scale factor - assert(cm->superres_scale_denominator >= SUPERRES_SCALE_DENOMINATOR_MIN); - assert(cm->superres_scale_denominator < - SUPERRES_SCALE_DENOMINATOR_MIN + (1 << SUPERRES_SCALE_BITS)); - aom_wb_write_literal( - wb, cm->superres_scale_denominator - SUPERRES_SCALE_DENOMINATOR_MIN, - SUPERRES_SCALE_BITS); - } -} - -static void write_frame_size(const AV1_COMMON *cm, int frame_size_override, - struct aom_write_bit_buffer *wb) { - const int coded_width = cm->superres_upscaled_width - 1; - const int coded_height = cm->superres_upscaled_height - 1; - - if (frame_size_override) { - const SequenceHeader *seq_params = &cm->seq_params; - int num_bits_width = seq_params->num_bits_width; - int num_bits_height = seq_params->num_bits_height; - aom_wb_write_literal(wb, coded_width, num_bits_width); - aom_wb_write_literal(wb, coded_height, num_bits_height); - } - - write_superres_scale(cm, wb); - write_render_size(cm, wb); -} - -static void write_frame_size_with_refs(AV1_COMP *cpi, - struct aom_write_bit_buffer *wb) { - AV1_COMMON *const cm = &cpi->common; - int found = 0; - - MV_REFERENCE_FRAME ref_frame; - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, ref_frame); - - if (cfg != NULL) { - found = cm->superres_upscaled_width == cfg->y_crop_width && - cm->superres_upscaled_height == cfg->y_crop_height; - found &= cm->render_width == cfg->render_width && - cm->render_height == cfg->render_height; - } - aom_wb_write_bit(wb, found); - if (found) { - write_superres_scale(cm, wb); - break; - } - } - - if (!found) { - int frame_size_override = 1; // Always equal to 1 in this function - write_frame_size(cm, frame_size_override, wb); - } -} - -static void write_profile(BITSTREAM_PROFILE profile, - struct aom_write_bit_buffer *wb) { - assert(profile >= PROFILE_0 && profile < MAX_PROFILES); - aom_wb_write_literal(wb, profile, PROFILE_BITS); -} - -static void write_bitdepth(const SequenceHeader *const seq_params, - struct aom_write_bit_buffer *wb) { - // Profile 0/1: [0] for 8 bit, [1] 10-bit - // Profile 2: [0] for 8 bit, [10] 10-bit, [11] - 12-bit - aom_wb_write_bit(wb, seq_params->bit_depth == AOM_BITS_8 ? 0 : 1); - if (seq_params->profile == PROFILE_2 && seq_params->bit_depth != AOM_BITS_8) { - aom_wb_write_bit(wb, seq_params->bit_depth == AOM_BITS_10 ? 0 : 1); - } -} - -static void write_color_config(const SequenceHeader *const seq_params, - struct aom_write_bit_buffer *wb) { - write_bitdepth(seq_params, wb); - const int is_monochrome = seq_params->monochrome; - // monochrome bit - if (seq_params->profile != PROFILE_1) - aom_wb_write_bit(wb, is_monochrome); - else - assert(!is_monochrome); - if (seq_params->color_primaries == AOM_CICP_CP_UNSPECIFIED && - seq_params->transfer_characteristics == AOM_CICP_TC_UNSPECIFIED && - seq_params->matrix_coefficients == AOM_CICP_MC_UNSPECIFIED) { - aom_wb_write_bit(wb, 0); // No color description present - } else { - aom_wb_write_bit(wb, 1); // Color description present - aom_wb_write_literal(wb, seq_params->color_primaries, 8); - aom_wb_write_literal(wb, seq_params->transfer_characteristics, 8); - aom_wb_write_literal(wb, seq_params->matrix_coefficients, 8); - } - if (is_monochrome) { - // 0: [16, 235] (i.e. xvYCC), 1: [0, 255] - aom_wb_write_bit(wb, seq_params->color_range); - return; - } - if (seq_params->color_primaries == AOM_CICP_CP_BT_709 && - seq_params->transfer_characteristics == AOM_CICP_TC_SRGB && - seq_params->matrix_coefficients == - AOM_CICP_MC_IDENTITY) { // it would be better to remove this - // dependency too - assert(seq_params->subsampling_x == 0 && seq_params->subsampling_y == 0); - assert(seq_params->profile == PROFILE_1 || - (seq_params->profile == PROFILE_2 && - seq_params->bit_depth == AOM_BITS_12)); - } else { - // 0: [16, 235] (i.e. xvYCC), 1: [0, 255] - aom_wb_write_bit(wb, seq_params->color_range); - if (seq_params->profile == PROFILE_0) { - // 420 only - assert(seq_params->subsampling_x == 1 && seq_params->subsampling_y == 1); - } else if (seq_params->profile == PROFILE_1) { - // 444 only - assert(seq_params->subsampling_x == 0 && seq_params->subsampling_y == 0); - } else if (seq_params->profile == PROFILE_2) { - if (seq_params->bit_depth == AOM_BITS_12) { - // 420, 444 or 422 - aom_wb_write_bit(wb, seq_params->subsampling_x); - if (seq_params->subsampling_x == 0) { - assert(seq_params->subsampling_y == 0 && - "4:4:0 subsampling not allowed in AV1"); - } else { - aom_wb_write_bit(wb, seq_params->subsampling_y); - } - } else { - // 422 only - assert(seq_params->subsampling_x == 1 && - seq_params->subsampling_y == 0); - } - } - if (seq_params->matrix_coefficients == AOM_CICP_MC_IDENTITY) { - assert(seq_params->subsampling_x == 0 && seq_params->subsampling_y == 0); - } - if (seq_params->subsampling_x == 1 && seq_params->subsampling_y == 1) { - aom_wb_write_literal(wb, seq_params->chroma_sample_position, 2); - } - } - aom_wb_write_bit(wb, seq_params->separate_uv_delta_q); -} - -static void write_timing_info_header(AV1_COMMON *const cm, - struct aom_write_bit_buffer *wb) { - aom_wb_write_unsigned_literal(wb, cm->timing_info.num_units_in_display_tick, - 32); // Number of units in tick - aom_wb_write_unsigned_literal(wb, cm->timing_info.time_scale, - 32); // Time scale - aom_wb_write_bit( - wb, - cm->timing_info.equal_picture_interval); // Equal picture interval bit - if (cm->timing_info.equal_picture_interval) { - aom_wb_write_uvlc( - wb, - cm->timing_info.num_ticks_per_picture - 1); // ticks per picture - } -} - -static void write_decoder_model_info(AV1_COMMON *const cm, - struct aom_write_bit_buffer *wb) { - aom_wb_write_literal( - wb, cm->buffer_model.encoder_decoder_buffer_delay_length - 1, 5); - aom_wb_write_unsigned_literal(wb, cm->buffer_model.num_units_in_decoding_tick, - 32); // Number of units in decoding tick - aom_wb_write_literal(wb, cm->buffer_model.buffer_removal_time_length - 1, 5); - aom_wb_write_literal(wb, cm->buffer_model.frame_presentation_time_length - 1, - 5); -} - -static void write_dec_model_op_parameters(AV1_COMMON *const cm, - struct aom_write_bit_buffer *wb, - int op_num) { - if (op_num > MAX_NUM_OPERATING_POINTS) - aom_internal_error( - &cm->error, AOM_CODEC_UNSUP_BITSTREAM, - "Encoder does not support %d decoder model operating points", op_num); - - // aom_wb_write_bit(wb, cm->op_params[op_num].has_parameters); - // if (!cm->op_params[op_num].has_parameters) return; - - aom_wb_write_unsigned_literal( - wb, cm->op_params[op_num].decoder_buffer_delay, - cm->buffer_model.encoder_decoder_buffer_delay_length); - - aom_wb_write_unsigned_literal( - wb, cm->op_params[op_num].encoder_buffer_delay, - cm->buffer_model.encoder_decoder_buffer_delay_length); - - aom_wb_write_bit(wb, cm->op_params[op_num].low_delay_mode_flag); - - cm->op_frame_timing[op_num].buffer_removal_time = - 0; // reset the decoded frame counter -} - -static void write_tu_pts_info(AV1_COMMON *const cm, - struct aom_write_bit_buffer *wb) { - aom_wb_write_unsigned_literal( - wb, cm->frame_presentation_time, - cm->buffer_model.frame_presentation_time_length); -} - -static void write_film_grain_params(AV1_COMP *cpi, - struct aom_write_bit_buffer *wb) { - AV1_COMMON *const cm = &cpi->common; - aom_film_grain_t *pars = &cm->film_grain_params; - - cm->cur_frame->film_grain_params = *pars; - - aom_wb_write_bit(wb, pars->apply_grain); - if (!pars->apply_grain) return; - - aom_wb_write_literal(wb, pars->random_seed, 16); - - pars->random_seed += 3381; // Changing random seed for film grain - if (!pars->random_seed) // Random seed should not be zero - pars->random_seed += 7391; - if (cm->frame_type == INTER_FRAME) - aom_wb_write_bit(wb, pars->update_parameters); - else - pars->update_parameters = 1; - if (!pars->update_parameters) { - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - int ref_frame, ref_idx, buf_idx; - for (ref_frame = LAST_FRAME; ref_frame < REF_FRAMES; ref_frame++) { - ref_idx = get_ref_frame_map_idx(cpi, ref_frame); - assert(ref_idx != INVALID_IDX); - buf_idx = cm->ref_frame_map[ref_idx]; - if (frame_bufs[buf_idx].film_grain_params_present && - memcmp(pars, &frame_bufs[buf_idx].film_grain_params, sizeof(*pars))) { - break; - } - } - assert(ref_frame < REF_FRAMES); - aom_wb_write_literal(wb, ref_idx, 3); - return; - } - - // Scaling functions parameters - aom_wb_write_literal(wb, pars->num_y_points, 4); // max 14 - for (int i = 0; i < pars->num_y_points; i++) { - aom_wb_write_literal(wb, pars->scaling_points_y[i][0], 8); - aom_wb_write_literal(wb, pars->scaling_points_y[i][1], 8); - } - - if (!cm->seq_params.monochrome) - aom_wb_write_bit(wb, pars->chroma_scaling_from_luma); - else - pars->chroma_scaling_from_luma = 0; // for monochrome override to 0 - - if (cm->seq_params.monochrome || pars->chroma_scaling_from_luma || - ((cm->seq_params.subsampling_x == 1) && - (cm->seq_params.subsampling_y == 1) && (pars->num_y_points == 0))) { - pars->num_cb_points = 0; - pars->num_cr_points = 0; - } else { - aom_wb_write_literal(wb, pars->num_cb_points, 4); // max 10 - for (int i = 0; i < pars->num_cb_points; i++) { - aom_wb_write_literal(wb, pars->scaling_points_cb[i][0], 8); - aom_wb_write_literal(wb, pars->scaling_points_cb[i][1], 8); - } - - aom_wb_write_literal(wb, pars->num_cr_points, 4); // max 10 - for (int i = 0; i < pars->num_cr_points; i++) { - aom_wb_write_literal(wb, pars->scaling_points_cr[i][0], 8); - aom_wb_write_literal(wb, pars->scaling_points_cr[i][1], 8); - } - } - - aom_wb_write_literal(wb, pars->scaling_shift - 8, 2); // 8 + value - - // AR coefficients - // Only sent if the corresponsing scaling function has - // more than 0 points - - aom_wb_write_literal(wb, pars->ar_coeff_lag, 2); - - int num_pos_luma = 2 * pars->ar_coeff_lag * (pars->ar_coeff_lag + 1); - int num_pos_chroma = num_pos_luma; - if (pars->num_y_points > 0) ++num_pos_chroma; - - if (pars->num_y_points) - for (int i = 0; i < num_pos_luma; i++) - aom_wb_write_literal(wb, pars->ar_coeffs_y[i] + 128, 8); - - if (pars->num_cb_points || pars->chroma_scaling_from_luma) - for (int i = 0; i < num_pos_chroma; i++) - aom_wb_write_literal(wb, pars->ar_coeffs_cb[i] + 128, 8); - - if (pars->num_cr_points || pars->chroma_scaling_from_luma) - for (int i = 0; i < num_pos_chroma; i++) - aom_wb_write_literal(wb, pars->ar_coeffs_cr[i] + 128, 8); - - aom_wb_write_literal(wb, pars->ar_coeff_shift - 6, 2); // 8 + value - - aom_wb_write_literal(wb, pars->grain_scale_shift, 2); - - if (pars->num_cb_points) { - aom_wb_write_literal(wb, pars->cb_mult, 8); - aom_wb_write_literal(wb, pars->cb_luma_mult, 8); - aom_wb_write_literal(wb, pars->cb_offset, 9); - } - - if (pars->num_cr_points) { - aom_wb_write_literal(wb, pars->cr_mult, 8); - aom_wb_write_literal(wb, pars->cr_luma_mult, 8); - aom_wb_write_literal(wb, pars->cr_offset, 9); - } - - aom_wb_write_bit(wb, pars->overlap_flag); - - aom_wb_write_bit(wb, pars->clip_to_restricted_range); -} - -static void write_sb_size(SequenceHeader *seq_params, - struct aom_write_bit_buffer *wb) { - (void)seq_params; - (void)wb; - assert(seq_params->mib_size == mi_size_wide[seq_params->sb_size]); - assert(seq_params->mib_size == 1 << seq_params->mib_size_log2); - assert(seq_params->sb_size == BLOCK_128X128 || - seq_params->sb_size == BLOCK_64X64); - aom_wb_write_bit(wb, seq_params->sb_size == BLOCK_128X128 ? 1 : 0); -} - -static void write_sequence_header(AV1_COMP *cpi, - struct aom_write_bit_buffer *wb) { - AV1_COMMON *const cm = &cpi->common; - SequenceHeader *seq_params = &cm->seq_params; - - int max_frame_width = cpi->oxcf.forced_max_frame_width - ? cpi->oxcf.forced_max_frame_width - : cpi->oxcf.width; - int max_frame_height = cpi->oxcf.forced_max_frame_height - ? cpi->oxcf.forced_max_frame_height - : cpi->oxcf.height; - // max((int)ceil(log2(max_frame_width)), 1) - const int num_bits_width = - (max_frame_width > 1) ? get_msb(max_frame_width - 1) + 1 : 1; - // max((int)ceil(log2(max_frame_height)), 1) - const int num_bits_height = - (max_frame_height > 1) ? get_msb(max_frame_height - 1) + 1 : 1; - assert(num_bits_width <= 16); - assert(num_bits_height <= 16); - - seq_params->num_bits_width = num_bits_width; - seq_params->num_bits_height = num_bits_height; - seq_params->max_frame_width = max_frame_width; - seq_params->max_frame_height = max_frame_height; - - aom_wb_write_literal(wb, num_bits_width - 1, 4); - aom_wb_write_literal(wb, num_bits_height - 1, 4); - aom_wb_write_literal(wb, max_frame_width - 1, num_bits_width); - aom_wb_write_literal(wb, max_frame_height - 1, num_bits_height); - - /* Placeholder for actually writing to the bitstream */ - if (!seq_params->reduced_still_picture_hdr) { - seq_params->frame_id_numbers_present_flag = - cm->large_scale_tile ? 0 : cm->error_resilient_mode; - seq_params->frame_id_length = FRAME_ID_LENGTH; - seq_params->delta_frame_id_length = DELTA_FRAME_ID_LENGTH; - - aom_wb_write_bit(wb, seq_params->frame_id_numbers_present_flag); - if (seq_params->frame_id_numbers_present_flag) { - // We must always have delta_frame_id_length < frame_id_length, - // in order for a frame to be referenced with a unique delta. - // Avoid wasting bits by using a coding that enforces this restriction. - aom_wb_write_literal(wb, seq_params->delta_frame_id_length - 2, 4); - aom_wb_write_literal( - wb, - seq_params->frame_id_length - seq_params->delta_frame_id_length - 1, - 3); - } - } - - write_sb_size(seq_params, wb); - - aom_wb_write_bit(wb, seq_params->enable_filter_intra); - aom_wb_write_bit(wb, seq_params->enable_intra_edge_filter); - - if (!seq_params->reduced_still_picture_hdr) { - aom_wb_write_bit(wb, seq_params->enable_interintra_compound); - aom_wb_write_bit(wb, seq_params->enable_masked_compound); - aom_wb_write_bit(wb, seq_params->enable_warped_motion); - aom_wb_write_bit(wb, seq_params->enable_dual_filter); - - aom_wb_write_bit(wb, seq_params->enable_order_hint); - - if (seq_params->enable_order_hint) { - aom_wb_write_bit(wb, seq_params->enable_jnt_comp); - aom_wb_write_bit(wb, seq_params->enable_ref_frame_mvs); - } - if (seq_params->force_screen_content_tools == 2) { - aom_wb_write_bit(wb, 1); - } else { - aom_wb_write_bit(wb, 0); - aom_wb_write_bit(wb, seq_params->force_screen_content_tools); - } - if (seq_params->force_screen_content_tools > 0) { - if (seq_params->force_integer_mv == 2) { - aom_wb_write_bit(wb, 1); - } else { - aom_wb_write_bit(wb, 0); - aom_wb_write_bit(wb, seq_params->force_integer_mv); - } - } else { - assert(seq_params->force_integer_mv == 2); - } - if (seq_params->enable_order_hint) - aom_wb_write_literal(wb, seq_params->order_hint_bits_minus_1, 3); - } - - aom_wb_write_bit(wb, seq_params->enable_superres); - aom_wb_write_bit(wb, seq_params->enable_cdef); - aom_wb_write_bit(wb, seq_params->enable_restoration); -} - -static void write_global_motion_params(const WarpedMotionParams *params, - const WarpedMotionParams *ref_params, - struct aom_write_bit_buffer *wb, - int allow_hp) { - const TransformationType type = params->wmtype; - - aom_wb_write_bit(wb, type != IDENTITY); - if (type != IDENTITY) { - aom_wb_write_bit(wb, type == ROTZOOM); - if (type != ROTZOOM) aom_wb_write_bit(wb, type == TRANSLATION); - } - - if (type >= ROTZOOM) { - aom_wb_write_signed_primitive_refsubexpfin( - wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K, - (ref_params->wmmat[2] >> GM_ALPHA_PREC_DIFF) - - (1 << GM_ALPHA_PREC_BITS), - (params->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS)); - aom_wb_write_signed_primitive_refsubexpfin( - wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K, - (ref_params->wmmat[3] >> GM_ALPHA_PREC_DIFF), - (params->wmmat[3] >> GM_ALPHA_PREC_DIFF)); - } - - if (type >= AFFINE) { - aom_wb_write_signed_primitive_refsubexpfin( - wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K, - (ref_params->wmmat[4] >> GM_ALPHA_PREC_DIFF), - (params->wmmat[4] >> GM_ALPHA_PREC_DIFF)); - aom_wb_write_signed_primitive_refsubexpfin( - wb, GM_ALPHA_MAX + 1, SUBEXPFIN_K, - (ref_params->wmmat[5] >> GM_ALPHA_PREC_DIFF) - - (1 << GM_ALPHA_PREC_BITS), - (params->wmmat[5] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS)); - } - - if (type >= TRANSLATION) { - const int trans_bits = (type == TRANSLATION) - ? GM_ABS_TRANS_ONLY_BITS - !allow_hp - : GM_ABS_TRANS_BITS; - const int trans_prec_diff = (type == TRANSLATION) - ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp - : GM_TRANS_PREC_DIFF; - aom_wb_write_signed_primitive_refsubexpfin( - wb, (1 << trans_bits) + 1, SUBEXPFIN_K, - (ref_params->wmmat[0] >> trans_prec_diff), - (params->wmmat[0] >> trans_prec_diff)); - aom_wb_write_signed_primitive_refsubexpfin( - wb, (1 << trans_bits) + 1, SUBEXPFIN_K, - (ref_params->wmmat[1] >> trans_prec_diff), - (params->wmmat[1] >> trans_prec_diff)); - } -} - -static void write_global_motion(AV1_COMP *cpi, - struct aom_write_bit_buffer *wb) { - AV1_COMMON *const cm = &cpi->common; - int frame; - for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) { - const WarpedMotionParams *ref_params = - cm->prev_frame ? &cm->prev_frame->global_motion[frame] - : &default_warp_params; - write_global_motion_params(&cm->global_motion[frame], ref_params, wb, - cm->allow_high_precision_mv); - // TODO(sarahparker, debargha): The logic in the commented out code below - // does not work currently and causes mismatches when resize is on. - // Fix it before turning the optimization back on. - /* - YV12_BUFFER_CONFIG *ref_buf = get_ref_frame_buffer(cpi, frame); - if (cpi->source->y_crop_width == ref_buf->y_crop_width && - cpi->source->y_crop_height == ref_buf->y_crop_height) { - write_global_motion_params(&cm->global_motion[frame], - &cm->prev_frame->global_motion[frame], wb, - cm->allow_high_precision_mv); - } else { - assert(cm->global_motion[frame].wmtype == IDENTITY && - "Invalid warp type for frames of different resolutions"); - } - */ - /* - printf("Frame %d/%d: Enc Ref %d: %d %d %d %d\n", - cm->current_video_frame, cm->show_frame, frame, - cm->global_motion[frame].wmmat[0], - cm->global_motion[frame].wmmat[1], cm->global_motion[frame].wmmat[2], - cm->global_motion[frame].wmmat[3]); - */ - } -} - -static void check_frame_refs_short_signaling(AV1_COMP *const cpi) { - AV1_COMMON *const cm = &cpi->common; - if (!cm->frame_refs_short_signaling) return; - - // Check whether all references are distinct frames. - int buf_markers[FRAME_BUFFERS] = { 0 }; - for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); - if (buf_idx != INVALID_IDX) { - assert(buf_idx >= 0 && buf_idx < FRAME_BUFFERS); - buf_markers[buf_idx] = 1; - } - } - - int num_refs = 0; - for (int buf_idx = 0; buf_idx < FRAME_BUFFERS; ++buf_idx) { - num_refs += buf_markers[buf_idx]; - } - - // We only turn on frame_refs_short_signaling when all references are - // distinct. - if (num_refs < INTER_REFS_PER_FRAME) { - // It indicates that there exist more than one reference frame pointing to - // the same reference buffer, i.e. two or more references are duplicate. - cm->frame_refs_short_signaling = 0; - return; - } - - // Check whether the encoder side ref frame choices are aligned with that to - // be derived at the decoder side. - RefBuffer frame_refs_copy[INTER_REFS_PER_FRAME]; - - // Backup the frame refs info - memcpy(frame_refs_copy, cm->frame_refs, - INTER_REFS_PER_FRAME * sizeof(RefBuffer)); - - const int lst_map_idx = get_ref_frame_map_idx(cpi, LAST_FRAME); - const int gld_map_idx = get_ref_frame_map_idx(cpi, GOLDEN_FRAME); - - // Set up the frame refs mapping indexes according to the - // frame_refs_short_signaling policy. - av1_set_frame_refs(cm, lst_map_idx, gld_map_idx); - - // We only turn on frame_refs_short_signaling when the encoder side decision - // on ref frames is identical to that at the decoder side. - for (int ref_idx = 0; ref_idx < INTER_REFS_PER_FRAME; ++ref_idx) { - // Compare the buffer index between two reference frames indexed - // respectively by the encoder and the decoder side decisions. - if (cm->frame_refs[ref_idx].idx != frame_refs_copy[ref_idx].idx) { - cm->frame_refs_short_signaling = 0; - break; - } - } - -#if 0 // For debug - printf("\nFrame=%d: \n", cm->current_video_frame); - printf("***frame_refs_short_signaling=%d\n", cm->frame_refs_short_signaling); - for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - printf("enc_ref(map_idx=%d, buf_idx=%d)=%d, vs. " - "dec_ref(map_idx=%d, buf_idx=%d)=%d\n", - get_ref_frame_map_idx(cpi, ref_frame), - get_ref_frame_buf_idx(cpi, ref_frame), ref_frame, - cm->frame_refs[ref_frame - LAST_FRAME].map_idx, - cm->frame_refs[ref_frame - LAST_FRAME].idx, ref_frame); - } -#endif // 0 - - // Restore the frame refs info if frame_refs_short_signaling is off. - if (!cm->frame_refs_short_signaling) - memcpy(cm->frame_refs, frame_refs_copy, - INTER_REFS_PER_FRAME * sizeof(RefBuffer)); -} - -// New function based on HLS R18 -static void write_uncompressed_header_obu(AV1_COMP *cpi, - struct aom_write_bit_buffer *saved_wb, - struct aom_write_bit_buffer *wb) { - AV1_COMMON *const cm = &cpi->common; - const SequenceHeader *const seq_params = &cm->seq_params; - MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; - - // NOTE: By default all coded frames to be used as a reference - cm->is_reference_frame = 1; - cm->frame_type = cm->intra_only ? INTRA_ONLY_FRAME : cm->frame_type; - - if (seq_params->still_picture) { - assert(cm->show_existing_frame == 0); - assert(cm->show_frame == 1); - assert(cm->frame_type == KEY_FRAME); - } - if (!seq_params->reduced_still_picture_hdr) { - if (encode_show_existing_frame(cm)) { - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - const int frame_to_show = cm->ref_frame_map[cpi->existing_fb_idx_to_show]; - - if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) { - aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM, - "Buffer %d does not contain a reconstructed frame", - frame_to_show); - } - ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show); - - aom_wb_write_bit(wb, 1); // show_existing_frame - aom_wb_write_literal(wb, cpi->existing_fb_idx_to_show, 3); - - if (seq_params->decoder_model_info_present_flag && - cm->timing_info.equal_picture_interval == 0) { - write_tu_pts_info(cm, wb); - } - if (seq_params->frame_id_numbers_present_flag) { - int frame_id_len = seq_params->frame_id_length; - int display_frame_id = cm->ref_frame_id[cpi->existing_fb_idx_to_show]; - aom_wb_write_literal(wb, display_frame_id, frame_id_len); - } - - if (cm->reset_decoder_state && - frame_bufs[frame_to_show].frame_type != KEY_FRAME) { - aom_internal_error( - &cm->error, AOM_CODEC_UNSUP_BITSTREAM, - "show_existing_frame to reset state on KEY_FRAME only"); - } - - return; - } else { - aom_wb_write_bit(wb, 0); // show_existing_frame - } - - aom_wb_write_literal(wb, cm->frame_type, 2); - - aom_wb_write_bit(wb, cm->show_frame); - if (cm->show_frame) { - if (seq_params->decoder_model_info_present_flag && - cm->timing_info.equal_picture_interval == 0) - write_tu_pts_info(cm, wb); - } else { - aom_wb_write_bit(wb, cm->showable_frame); - } - if (frame_is_sframe(cm)) { - assert(cm->error_resilient_mode); - } else if (!(cm->frame_type == KEY_FRAME && cm->show_frame)) { - aom_wb_write_bit(wb, cm->error_resilient_mode); - } - } - aom_wb_write_bit(wb, cm->disable_cdf_update); - - if (seq_params->force_screen_content_tools == 2) { - aom_wb_write_bit(wb, cm->allow_screen_content_tools); - } else { - assert(cm->allow_screen_content_tools == - seq_params->force_screen_content_tools); - } - - if (cm->allow_screen_content_tools) { - if (seq_params->force_integer_mv == 2) { - aom_wb_write_bit(wb, cm->cur_frame_force_integer_mv); - } else { - assert(cm->cur_frame_force_integer_mv == seq_params->force_integer_mv); - } - } else { - assert(cm->cur_frame_force_integer_mv == 0); - } - - cm->invalid_delta_frame_id_minus_1 = 0; - int frame_size_override_flag = 0; - cm->frame_refs_short_signaling = 0; - - if (seq_params->reduced_still_picture_hdr) { - assert(cm->width == seq_params->max_frame_width && - cm->height == seq_params->max_frame_height); - } else { - if (seq_params->frame_id_numbers_present_flag) { - int frame_id_len = seq_params->frame_id_length; - aom_wb_write_literal(wb, cm->current_frame_id, frame_id_len); - } - - if (cm->width > seq_params->max_frame_width || - cm->height > seq_params->max_frame_height) { - aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM, - "Frame dimensions are larger than the maximum values"); - } - - frame_size_override_flag = - frame_is_sframe(cm) ? 1 - : (cm->width != seq_params->max_frame_width || - cm->height != seq_params->max_frame_height); - if (!frame_is_sframe(cm)) aom_wb_write_bit(wb, frame_size_override_flag); - - if (seq_params->enable_order_hint) - aom_wb_write_literal(wb, cm->frame_offset, - seq_params->order_hint_bits_minus_1 + 1); - - if (!cm->error_resilient_mode && !frame_is_intra_only(cm)) { - aom_wb_write_literal(wb, cm->primary_ref_frame, PRIMARY_REF_BITS); - } - } - - if (seq_params->decoder_model_info_present_flag) { - aom_wb_write_bit(wb, cm->buffer_removal_time_present); - if (cm->buffer_removal_time_present) { - for (int op_num = 0; - op_num < seq_params->operating_points_cnt_minus_1 + 1; op_num++) { - if (cm->op_params[op_num].decoder_model_param_present_flag) { - if (((seq_params->operating_point_idc[op_num] >> - cm->temporal_layer_id) & - 0x1 && - (seq_params->operating_point_idc[op_num] >> - (cm->spatial_layer_id + 8)) & - 0x1) || - seq_params->operating_point_idc[op_num] == 0) { - aom_wb_write_unsigned_literal( - wb, cm->op_frame_timing[op_num].buffer_removal_time, - cm->buffer_model.buffer_removal_time_length); - cm->op_frame_timing[op_num].buffer_removal_time++; - if (cm->op_frame_timing[op_num].buffer_removal_time == 0) { - aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM, - "buffer_removal_time overflowed"); - } - } - } - } - } - } - cpi->refresh_frame_mask = get_refresh_mask(cpi); - if (cm->frame_type == KEY_FRAME) { - if (!cm->show_frame) { // unshown keyframe (forward keyframe) - aom_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES); - } else { - assert(cpi->refresh_frame_mask == 0xFF); - } - } else { - if (cm->frame_type == INTRA_ONLY_FRAME) { - assert(cpi->refresh_frame_mask != 0xFF); - int updated_fb = -1; - for (int i = 0; i < REF_FRAMES; i++) { - // If more than one frame is refreshed, it doesn't matter which one - // we pick, so pick the first. - if (cpi->refresh_frame_mask & (1 << i)) { - updated_fb = i; - break; - } - } - assert(updated_fb >= 0); - cm->fb_of_context_type[cm->frame_context_idx] = updated_fb; - aom_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES); - } else if (cm->frame_type == INTER_FRAME || frame_is_sframe(cm)) { - if (cm->frame_type == INTER_FRAME) { - aom_wb_write_literal(wb, cpi->refresh_frame_mask, REF_FRAMES); - } else { - assert(frame_is_sframe(cm) && cpi->refresh_frame_mask == 0xFF); - } - int updated_fb = -1; - for (int i = 0; i < REF_FRAMES; i++) { - // If more than one frame is refreshed, it doesn't matter which one - // we pick, so pick the first. - if (cpi->refresh_frame_mask & (1 << i)) { - updated_fb = i; - break; - } - } - // large scale tile sometimes won't refresh any fbs - if (updated_fb >= 0) { - cm->fb_of_context_type[cm->frame_context_idx] = updated_fb; - } - - if (!cpi->refresh_frame_mask) { - // NOTE: "cpi->refresh_frame_mask == 0" indicates that the coded frame - // will not be used as a reference - cm->is_reference_frame = 0; - } - } - } - - if (!frame_is_intra_only(cm) || cpi->refresh_frame_mask != 0xFF) { - // Write all ref frame order hints if error_resilient_mode == 1 - if (cm->error_resilient_mode && seq_params->enable_order_hint) { - RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; - for (int ref_idx = 0; ref_idx < REF_FRAMES; ref_idx++) { - // Get buffer index - const int buf_idx = cm->ref_frame_map[ref_idx]; - assert(buf_idx >= 0 && buf_idx < FRAME_BUFFERS); - - // Write order hint to bit stream - aom_wb_write_literal(wb, frame_bufs[buf_idx].cur_frame_offset, - seq_params->order_hint_bits_minus_1 + 1); - } - } - } - - if (cm->frame_type == KEY_FRAME) { - write_frame_size(cm, frame_size_override_flag, wb); - assert(!av1_superres_scaled(cm) || !cm->allow_intrabc); - if (cm->allow_screen_content_tools && !av1_superres_scaled(cm)) - aom_wb_write_bit(wb, cm->allow_intrabc); - // all eight fbs are refreshed, pick one that will live long enough - cm->fb_of_context_type[REGULAR_FRAME] = 0; - } else { - if (cm->frame_type == INTRA_ONLY_FRAME) { - write_frame_size(cm, frame_size_override_flag, wb); - assert(!av1_superres_scaled(cm) || !cm->allow_intrabc); - if (cm->allow_screen_content_tools && !av1_superres_scaled(cm)) - aom_wb_write_bit(wb, cm->allow_intrabc); - } else if (cm->frame_type == INTER_FRAME || frame_is_sframe(cm)) { - MV_REFERENCE_FRAME ref_frame; - - // NOTE: Error resilient mode turns off frame_refs_short_signaling - // automatically. -#define FRAME_REFS_SHORT_SIGNALING 0 -#if FRAME_REFS_SHORT_SIGNALING - cm->frame_refs_short_signaling = seq_params->enable_order_hint; -#endif // FRAME_REFS_SHORT_SIGNALING - - if (cm->frame_refs_short_signaling) { - // NOTE(zoeliu@google.com): - // An example solution for encoder-side implementation on frame refs - // short signaling, which is only turned on when the encoder side - // decision on ref frames is identical to that at the decoder side. - check_frame_refs_short_signaling(cpi); - } - - if (seq_params->enable_order_hint) - aom_wb_write_bit(wb, cm->frame_refs_short_signaling); - - if (cm->frame_refs_short_signaling) { - const int lst_ref = get_ref_frame_map_idx(cpi, LAST_FRAME); - aom_wb_write_literal(wb, lst_ref, REF_FRAMES_LOG2); - - const int gld_ref = get_ref_frame_map_idx(cpi, GOLDEN_FRAME); - aom_wb_write_literal(wb, gld_ref, REF_FRAMES_LOG2); - } - - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX); - if (!cm->frame_refs_short_signaling) - aom_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame), - REF_FRAMES_LOG2); - if (seq_params->frame_id_numbers_present_flag) { - int i = get_ref_frame_map_idx(cpi, ref_frame); - int frame_id_len = seq_params->frame_id_length; - int diff_len = seq_params->delta_frame_id_length; - int delta_frame_id_minus_1 = - ((cm->current_frame_id - cm->ref_frame_id[i] + - (1 << frame_id_len)) % - (1 << frame_id_len)) - - 1; - if (delta_frame_id_minus_1 < 0 || - delta_frame_id_minus_1 >= (1 << diff_len)) - cm->invalid_delta_frame_id_minus_1 = 1; - aom_wb_write_literal(wb, delta_frame_id_minus_1, diff_len); - } - } - - if (!cm->error_resilient_mode && frame_size_override_flag) { - write_frame_size_with_refs(cpi, wb); - } else { - write_frame_size(cm, frame_size_override_flag, wb); - } - - if (cm->cur_frame_force_integer_mv) { - cm->allow_high_precision_mv = 0; - } else { - aom_wb_write_bit(wb, cm->allow_high_precision_mv); - } - fix_interp_filter(cm, cpi->td.counts); - write_frame_interp_filter(cm->interp_filter, wb); - aom_wb_write_bit(wb, cm->switchable_motion_mode); - if (frame_might_allow_ref_frame_mvs(cm)) { - aom_wb_write_bit(wb, cm->allow_ref_frame_mvs); - } else { - assert(cm->allow_ref_frame_mvs == 0); - } - } - } - - const int might_bwd_adapt = - !(seq_params->reduced_still_picture_hdr) && !(cm->disable_cdf_update); - if (cm->large_scale_tile) - cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED; - - if (might_bwd_adapt) { - aom_wb_write_bit( - wb, cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_DISABLED); - } - - write_tile_info(cm, saved_wb, wb); - encode_quantization(cm, wb); - encode_segmentation(cm, xd, wb); - - if (cm->delta_q_present_flag) assert(cm->base_qindex > 0); - if (cm->base_qindex > 0) { - aom_wb_write_bit(wb, cm->delta_q_present_flag); - if (cm->delta_q_present_flag) { - aom_wb_write_literal(wb, get_msb(cm->delta_q_res), 2); - xd->current_qindex = cm->base_qindex; - if (cm->allow_intrabc) - assert(cm->delta_lf_present_flag == 0); - else - aom_wb_write_bit(wb, cm->delta_lf_present_flag); - if (cm->delta_lf_present_flag) { - aom_wb_write_literal(wb, get_msb(cm->delta_lf_res), 2); - aom_wb_write_bit(wb, cm->delta_lf_multi); - av1_reset_loop_filter_delta(xd, av1_num_planes(cm)); - } - } - } - - if (cm->all_lossless) { - assert(!av1_superres_scaled(cm)); - } else { - if (!cm->coded_lossless) { - encode_loopfilter(cm, wb); - encode_cdef(cm, wb); - } - encode_restoration_mode(cm, wb); - } - - write_tx_mode(cm, &cm->tx_mode, wb); - - if (cpi->allow_comp_inter_inter) { - const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT; - - aom_wb_write_bit(wb, use_hybrid_pred); - } - - if (cm->is_skip_mode_allowed) aom_wb_write_bit(wb, cm->skip_mode_flag); - - if (frame_might_allow_warped_motion(cm)) - aom_wb_write_bit(wb, cm->allow_warped_motion); - else - assert(!cm->allow_warped_motion); - - aom_wb_write_bit(wb, cm->reduced_tx_set_used); - - if (!frame_is_intra_only(cm)) write_global_motion(cpi, wb); - - if (seq_params->film_grain_params_present && - (cm->show_frame || cm->showable_frame)) { - int flip_back_update_parameters_flag = 0; - if (cm->frame_type != INTER_FRAME && - cm->film_grain_params.update_parameters == 0) { - cm->film_grain_params.update_parameters = 1; - flip_back_update_parameters_flag = 1; - } - write_film_grain_params(cpi, wb); - - if (flip_back_update_parameters_flag) - cm->film_grain_params.update_parameters = 0; - } - - if (cm->large_scale_tile) write_ext_tile_info(cm, saved_wb, wb); -} - -static int choose_size_bytes(uint32_t size, int spare_msbs) { - // Choose the number of bytes required to represent size, without - // using the 'spare_msbs' number of most significant bits. - - // Make sure we will fit in 4 bytes to start with.. - if (spare_msbs > 0 && size >> (32 - spare_msbs) != 0) return -1; - - // Normalise to 32 bits - size <<= spare_msbs; - - if (size >> 24 != 0) - return 4; - else if (size >> 16 != 0) - return 3; - else if (size >> 8 != 0) - return 2; - else - return 1; -} - -static void mem_put_varsize(uint8_t *const dst, const int sz, const int val) { - switch (sz) { - case 1: dst[0] = (uint8_t)(val & 0xff); break; - case 2: mem_put_le16(dst, val); break; - case 3: mem_put_le24(dst, val); break; - case 4: mem_put_le32(dst, val); break; - default: assert(0 && "Invalid size"); break; - } -} - -static int remux_tiles(const AV1_COMMON *const cm, uint8_t *dst, - const uint32_t data_size, const uint32_t max_tile_size, - const uint32_t max_tile_col_size, - int *const tile_size_bytes, - int *const tile_col_size_bytes) { - // Choose the tile size bytes (tsb) and tile column size bytes (tcsb) - int tsb; - int tcsb; - - if (cm->large_scale_tile) { - // The top bit in the tile size field indicates tile copy mode, so we - // have 1 less bit to code the tile size - tsb = choose_size_bytes(max_tile_size, 1); - tcsb = choose_size_bytes(max_tile_col_size, 0); - } else { - tsb = choose_size_bytes(max_tile_size, 0); - tcsb = 4; // This is ignored - (void)max_tile_col_size; - } - - assert(tsb > 0); - assert(tcsb > 0); - - *tile_size_bytes = tsb; - *tile_col_size_bytes = tcsb; - if (tsb == 4 && tcsb == 4) return data_size; - - uint32_t wpos = 0; - uint32_t rpos = 0; - - if (cm->large_scale_tile) { - int tile_row; - int tile_col; - - for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) { - // All but the last column has a column header - if (tile_col < cm->tile_cols - 1) { - uint32_t tile_col_size = mem_get_le32(dst + rpos); - rpos += 4; - - // Adjust the tile column size by the number of bytes removed - // from the tile size fields. - tile_col_size -= (4 - tsb) * cm->tile_rows; - - mem_put_varsize(dst + wpos, tcsb, tile_col_size); - wpos += tcsb; - } - - for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) { - // All, including the last row has a header - uint32_t tile_header = mem_get_le32(dst + rpos); - rpos += 4; - - // If this is a copy tile, we need to shift the MSB to the - // top bit of the new width, and there is no data to copy. - if (tile_header >> 31 != 0) { - if (tsb < 4) tile_header >>= 32 - 8 * tsb; - mem_put_varsize(dst + wpos, tsb, tile_header); - wpos += tsb; - } else { - mem_put_varsize(dst + wpos, tsb, tile_header); - wpos += tsb; - - tile_header += AV1_MIN_TILE_SIZE_BYTES; - memmove(dst + wpos, dst + rpos, tile_header); - rpos += tile_header; - wpos += tile_header; - } - } - } - - assert(rpos > wpos); - assert(rpos == data_size); - - return wpos; - } - const int n_tiles = cm->tile_cols * cm->tile_rows; - int n; - - for (n = 0; n < n_tiles; n++) { - int tile_size; - - if (n == n_tiles - 1) { - tile_size = data_size - rpos; - } else { - tile_size = mem_get_le32(dst + rpos); - rpos += 4; - mem_put_varsize(dst + wpos, tsb, tile_size); - tile_size += AV1_MIN_TILE_SIZE_BYTES; - wpos += tsb; - } - - memmove(dst + wpos, dst + rpos, tile_size); - - rpos += tile_size; - wpos += tile_size; - } - - assert(rpos > wpos); - assert(rpos == data_size); - - return wpos; -} - -uint32_t write_obu_header(OBU_TYPE obu_type, int obu_extension, - uint8_t *const dst) { - struct aom_write_bit_buffer wb = { dst, 0 }; - uint32_t size = 0; - - aom_wb_write_literal(&wb, 0, 1); // forbidden bit. - aom_wb_write_literal(&wb, (int)obu_type, 4); - aom_wb_write_literal(&wb, obu_extension ? 1 : 0, 1); - aom_wb_write_literal(&wb, 1, 1); // obu_has_payload_length_field - aom_wb_write_literal(&wb, 0, 1); // reserved - - if (obu_extension) { - aom_wb_write_literal(&wb, obu_extension & 0xFF, 8); - } - - size = aom_wb_bytes_written(&wb); - return size; -} - -int write_uleb_obu_size(uint32_t obu_header_size, uint32_t obu_payload_size, - uint8_t *dest) { - const uint32_t obu_size = obu_payload_size; - const uint32_t offset = obu_header_size; - size_t coded_obu_size = 0; - - if (aom_uleb_encode(obu_size, sizeof(obu_size), dest + offset, - &coded_obu_size) != 0) { - return AOM_CODEC_ERROR; - } - - return AOM_CODEC_OK; -} - -static size_t obu_memmove(uint32_t obu_header_size, uint32_t obu_payload_size, - uint8_t *data) { - const size_t length_field_size = aom_uleb_size_in_bytes(obu_payload_size); - const uint32_t move_dst_offset = - (uint32_t)length_field_size + obu_header_size; - const uint32_t move_src_offset = obu_header_size; - const uint32_t move_size = obu_payload_size; - memmove(data + move_dst_offset, data + move_src_offset, move_size); - return length_field_size; -} - -static void add_trailing_bits(struct aom_write_bit_buffer *wb) { - if (aom_wb_is_byte_aligned(wb)) { - aom_wb_write_literal(wb, 0x80, 8); - } else { - // assumes that the other bits are already 0s - aom_wb_write_bit(wb, 1); - } -} - -static void write_bitstream_level(BitstreamLevel bl, - struct aom_write_bit_buffer *wb) { - uint8_t seq_level_idx = major_minor_to_seq_level_idx(bl); - assert(is_valid_seq_level_idx(seq_level_idx)); - aom_wb_write_literal(wb, seq_level_idx, LEVEL_BITS); -} - -uint32_t write_sequence_header_obu(AV1_COMP *cpi, uint8_t *const dst) { - AV1_COMMON *const cm = &cpi->common; - struct aom_write_bit_buffer wb = { dst, 0 }; - uint32_t size = 0; - - write_profile(cm->seq_params.profile, &wb); - - // Still picture or not - aom_wb_write_bit(&wb, cm->seq_params.still_picture); - assert(IMPLIES(!cm->seq_params.still_picture, - !cm->seq_params.reduced_still_picture_hdr)); - // whether to use reduced still picture header - aom_wb_write_bit(&wb, cm->seq_params.reduced_still_picture_hdr); - - if (cm->seq_params.reduced_still_picture_hdr) { - assert(cm->timing_info_present == 0); - assert(cm->seq_params.decoder_model_info_present_flag == 0); - assert(cm->seq_params.display_model_info_present_flag == 0); - write_bitstream_level(cm->seq_params.level[0], &wb); - } else { - aom_wb_write_bit(&wb, cm->timing_info_present); // timing info present flag - - if (cm->timing_info_present) { - // timing_info - write_timing_info_header(cm, &wb); - aom_wb_write_bit(&wb, cm->seq_params.decoder_model_info_present_flag); - if (cm->seq_params.decoder_model_info_present_flag) { - write_decoder_model_info(cm, &wb); - } - } - aom_wb_write_bit(&wb, cm->seq_params.display_model_info_present_flag); - aom_wb_write_literal(&wb, cm->seq_params.operating_points_cnt_minus_1, - OP_POINTS_CNT_MINUS_1_BITS); - int i; - for (i = 0; i < cm->seq_params.operating_points_cnt_minus_1 + 1; i++) { - aom_wb_write_literal(&wb, cm->seq_params.operating_point_idc[i], - OP_POINTS_IDC_BITS); - write_bitstream_level(cm->seq_params.level[i], &wb); - if (cm->seq_params.level[i].major > 3) - aom_wb_write_bit(&wb, cm->seq_params.tier[i]); - if (cm->seq_params.decoder_model_info_present_flag) { - aom_wb_write_bit(&wb, - cm->op_params[i].decoder_model_param_present_flag); - if (cm->op_params[i].decoder_model_param_present_flag) - write_dec_model_op_parameters(cm, &wb, i); - } - if (cm->seq_params.display_model_info_present_flag) { - aom_wb_write_bit(&wb, - cm->op_params[i].display_model_param_present_flag); - if (cm->op_params[i].display_model_param_present_flag) { - assert(cm->op_params[i].initial_display_delay <= 10); - aom_wb_write_literal(&wb, cm->op_params[i].initial_display_delay - 1, - 4); - } - } - } - } - write_sequence_header(cpi, &wb); - - write_color_config(&cm->seq_params, &wb); - - aom_wb_write_bit(&wb, cm->seq_params.film_grain_params_present); - - add_trailing_bits(&wb); - - size = aom_wb_bytes_written(&wb); - return size; -} - -static uint32_t write_frame_header_obu(AV1_COMP *cpi, - struct aom_write_bit_buffer *saved_wb, - uint8_t *const dst, - int append_trailing_bits) { - struct aom_write_bit_buffer wb = { dst, 0 }; - write_uncompressed_header_obu(cpi, saved_wb, &wb); - if (append_trailing_bits) add_trailing_bits(&wb); - return aom_wb_bytes_written(&wb); -} - -static uint32_t write_tile_group_header(uint8_t *const dst, int startTile, - int endTile, int tiles_log2, - int tile_start_and_end_present_flag) { - struct aom_write_bit_buffer wb = { dst, 0 }; - uint32_t size = 0; - - if (!tiles_log2) return size; - - aom_wb_write_bit(&wb, tile_start_and_end_present_flag); - - if (tile_start_and_end_present_flag) { - aom_wb_write_literal(&wb, startTile, tiles_log2); - aom_wb_write_literal(&wb, endTile, tiles_log2); - } - - size = aom_wb_bytes_written(&wb); - return size; -} - -typedef struct { - uint8_t *frame_header; - size_t obu_header_byte_offset; - size_t total_length; -} FrameHeaderInfo; - -static uint32_t write_tiles_in_tg_obus(AV1_COMP *const cpi, uint8_t *const dst, - struct aom_write_bit_buffer *saved_wb, - uint8_t obu_extension_header, - const FrameHeaderInfo *fh_info) { - AV1_COMMON *const cm = &cpi->common; - aom_writer mode_bc; - int tile_row, tile_col; - TileBufferEnc(*const tile_buffers)[MAX_TILE_COLS] = cpi->tile_buffers; - uint32_t total_size = 0; - const int tile_cols = cm->tile_cols; - const int tile_rows = cm->tile_rows; - unsigned int tile_size = 0; - unsigned int max_tile_size = 0; - unsigned int max_tile_col_size = 0; - const int n_log2_tiles = cm->log2_tile_rows + cm->log2_tile_cols; - // Fixed size tile groups for the moment - const int num_tg_hdrs = cm->num_tg; - const int tg_size = - (cm->large_scale_tile) - ? 1 - : (tile_rows * tile_cols + num_tg_hdrs - 1) / num_tg_hdrs; - int tile_count = 0; - int curr_tg_data_size = 0; - uint8_t *data = dst; - int new_tg = 1; - const int have_tiles = tile_cols * tile_rows > 1; - int first_tg = 1; - - cm->largest_tile_id = 0; - - if (cm->large_scale_tile) { - // For large_scale_tile case, we always have only one tile group, so it can - // be written as an OBU_FRAME. - const OBU_TYPE obu_type = OBU_FRAME; - const uint32_t tg_hdr_size = write_obu_header(obu_type, 0, data); - data += tg_hdr_size; - - const uint32_t frame_header_size = - write_frame_header_obu(cpi, saved_wb, data, 0); - data += frame_header_size; - total_size += frame_header_size; - -#define EXT_TILE_DEBUG 0 -#if EXT_TILE_DEBUG - { - char fn[20] = "./fh"; - fn[4] = cm->current_video_frame / 100 + '0'; - fn[5] = (cm->current_video_frame % 100) / 10 + '0'; - fn[6] = (cm->current_video_frame % 10) + '0'; - fn[7] = '\0'; - av1_print_uncompressed_frame_header(data - frame_header_size, - frame_header_size, fn); - } -#endif // EXT_TILE_DEBUG -#undef EXT_TILE_DEBUG - - int tile_size_bytes = 0; - int tile_col_size_bytes = 0; - - for (tile_col = 0; tile_col < tile_cols; tile_col++) { - TileInfo tile_info; - const int is_last_col = (tile_col == tile_cols - 1); - const uint32_t col_offset = total_size; - - av1_tile_set_col(&tile_info, cm, tile_col); - - // The last column does not have a column header - if (!is_last_col) total_size += 4; - - for (tile_row = 0; tile_row < tile_rows; tile_row++) { - TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col]; - const int data_offset = have_tiles ? 4 : 0; - const int tile_idx = tile_row * tile_cols + tile_col; - TileDataEnc *this_tile = &cpi->tile_data[tile_idx]; - av1_tile_set_row(&tile_info, cm, tile_row); - - buf->data = dst + total_size + tg_hdr_size; - - // Is CONFIG_EXT_TILE = 1, every tile in the row has a header, - // even for the last one, unless no tiling is used at all. - total_size += data_offset; - // Initialise tile context from the frame context - this_tile->tctx = *cm->fc; - cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx; - mode_bc.allow_update_cdf = !cm->large_scale_tile; - mode_bc.allow_update_cdf = - mode_bc.allow_update_cdf && !cm->disable_cdf_update; - aom_start_encode(&mode_bc, buf->data + data_offset); - write_modes(cpi, &tile_info, &mode_bc, tile_row, tile_col); - aom_stop_encode(&mode_bc); - tile_size = mode_bc.pos; - buf->size = tile_size; - - // Record the maximum tile size we see, so we can compact headers later. - if (tile_size > max_tile_size) { - max_tile_size = tile_size; - cm->largest_tile_id = tile_cols * tile_row + tile_col; - } - - if (have_tiles) { - // tile header: size of this tile, or copy offset - uint32_t tile_header = tile_size - AV1_MIN_TILE_SIZE_BYTES; - const int tile_copy_mode = - ((AOMMAX(cm->tile_width, cm->tile_height) << MI_SIZE_LOG2) <= 256) - ? 1 - : 0; - - // If tile_copy_mode = 1, check if this tile is a copy tile. - // Very low chances to have copy tiles on the key frames, so don't - // search on key frames to reduce unnecessary search. - if (cm->frame_type != KEY_FRAME && tile_copy_mode) { - const int identical_tile_offset = - find_identical_tile(tile_row, tile_col, tile_buffers); - - if (identical_tile_offset > 0) { - tile_size = 0; - tile_header = identical_tile_offset | 0x80; - tile_header <<= 24; - } - } - - mem_put_le32(buf->data, tile_header); - } - - total_size += tile_size; - } - - if (!is_last_col) { - uint32_t col_size = total_size - col_offset - 4; - mem_put_le32(dst + col_offset + tg_hdr_size, col_size); - - // Record the maximum tile column size we see. - max_tile_col_size = AOMMAX(max_tile_col_size, col_size); - } - } - - if (have_tiles) { - total_size = remux_tiles(cm, data, total_size - frame_header_size, - max_tile_size, max_tile_col_size, - &tile_size_bytes, &tile_col_size_bytes); - total_size += frame_header_size; - } - - // In EXT_TILE case, only use 1 tile group. Follow the obu syntax, write - // current tile group size before tile data(include tile column header). - // Tile group size doesn't include the bytes storing tg size. - total_size += tg_hdr_size; - const uint32_t obu_payload_size = total_size - tg_hdr_size; - const size_t length_field_size = - obu_memmove(tg_hdr_size, obu_payload_size, dst); - if (write_uleb_obu_size(tg_hdr_size, obu_payload_size, dst) != - AOM_CODEC_OK) { - assert(0); - } - total_size += (uint32_t)length_field_size; - saved_wb->bit_buffer += length_field_size; - - // Now fill in the gaps in the uncompressed header. - if (have_tiles) { - assert(tile_col_size_bytes >= 1 && tile_col_size_bytes <= 4); - aom_wb_overwrite_literal(saved_wb, tile_col_size_bytes - 1, 2); - - assert(tile_size_bytes >= 1 && tile_size_bytes <= 4); - aom_wb_overwrite_literal(saved_wb, tile_size_bytes - 1, 2); - } - return total_size; - } - - uint32_t obu_header_size = 0; - uint8_t *tile_data_start = dst + total_size; - for (tile_row = 0; tile_row < tile_rows; tile_row++) { - TileInfo tile_info; - av1_tile_set_row(&tile_info, cm, tile_row); - - for (tile_col = 0; tile_col < tile_cols; tile_col++) { - const int tile_idx = tile_row * tile_cols + tile_col; - TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col]; - TileDataEnc *this_tile = &cpi->tile_data[tile_idx]; - int is_last_tile_in_tg = 0; - - if (new_tg) { - data = dst + total_size; - - // A new tile group begins at this tile. Write the obu header and - // tile group header - const OBU_TYPE obu_type = - (num_tg_hdrs == 1) ? OBU_FRAME : OBU_TILE_GROUP; - curr_tg_data_size = - write_obu_header(obu_type, obu_extension_header, data); - obu_header_size = curr_tg_data_size; - - if (num_tg_hdrs == 1) { - curr_tg_data_size += write_frame_header_obu( - cpi, saved_wb, data + curr_tg_data_size, 0); - } - curr_tg_data_size += write_tile_group_header( - data + curr_tg_data_size, tile_idx, - AOMMIN(tile_idx + tg_size - 1, tile_cols * tile_rows - 1), - n_log2_tiles, cm->num_tg > 1); - total_size += curr_tg_data_size; - tile_data_start += curr_tg_data_size; - new_tg = 0; - tile_count = 0; - } - tile_count++; - av1_tile_set_col(&tile_info, cm, tile_col); - - if (tile_count == tg_size || tile_idx == (tile_cols * tile_rows - 1)) { - is_last_tile_in_tg = 1; - new_tg = 1; - } else { - is_last_tile_in_tg = 0; - } - - buf->data = dst + total_size; - - // The last tile of the tile group does not have a header. - if (!is_last_tile_in_tg) total_size += 4; - - // Initialise tile context from the frame context - this_tile->tctx = *cm->fc; - cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx; - mode_bc.allow_update_cdf = 1; - mode_bc.allow_update_cdf = - mode_bc.allow_update_cdf && !cm->disable_cdf_update; - const int num_planes = av1_num_planes(cm); - av1_reset_loop_restoration(&cpi->td.mb.e_mbd, num_planes); - - aom_start_encode(&mode_bc, dst + total_size); - write_modes(cpi, &tile_info, &mode_bc, tile_row, tile_col); - aom_stop_encode(&mode_bc); - tile_size = mode_bc.pos; - assert(tile_size >= AV1_MIN_TILE_SIZE_BYTES); - - curr_tg_data_size += (tile_size + (is_last_tile_in_tg ? 0 : 4)); - buf->size = tile_size; - if (tile_size > max_tile_size) { - cm->largest_tile_id = tile_cols * tile_row + tile_col; - max_tile_size = tile_size; - } - - if (!is_last_tile_in_tg) { - // size of this tile - mem_put_le32(buf->data, tile_size - AV1_MIN_TILE_SIZE_BYTES); - } else { - // write current tile group size - const uint32_t obu_payload_size = curr_tg_data_size - obu_header_size; - const size_t length_field_size = - obu_memmove(obu_header_size, obu_payload_size, data); - if (write_uleb_obu_size(obu_header_size, obu_payload_size, data) != - AOM_CODEC_OK) { - assert(0); - } - curr_tg_data_size += (int)length_field_size; - total_size += (uint32_t)length_field_size; - tile_data_start += length_field_size; - if (num_tg_hdrs == 1) { - // if this tg is combined with the frame header then update saved - // frame header base offset accroding to length field size - saved_wb->bit_buffer += length_field_size; - } - - if (!first_tg && cm->error_resilient_mode) { - // Make room for a duplicate Frame Header OBU. - memmove(data + fh_info->total_length, data, curr_tg_data_size); - - // Insert a copy of the Frame Header OBU. - memcpy(data, fh_info->frame_header, fh_info->total_length); - - // Force context update tile to be the first tile in error - // resiliant mode as the duplicate frame headers will have - // context_update_tile_id set to 0 - cm->largest_tile_id = 0; - - // Rewrite the OBU header to change the OBU type to Redundant Frame - // Header. - write_obu_header(OBU_REDUNDANT_FRAME_HEADER, obu_extension_header, - &data[fh_info->obu_header_byte_offset]); - - data += fh_info->total_length; - - curr_tg_data_size += (int)(fh_info->total_length); - total_size += (uint32_t)(fh_info->total_length); - } - first_tg = 0; - } - - total_size += tile_size; - } - } - - if (have_tiles) { - // Fill in context_update_tile_id indicating the tile to use for the - // cdf update. The encoder currently sets it to the largest tile - // (but is up to the encoder) - aom_wb_overwrite_literal(saved_wb, cm->largest_tile_id, - cm->log2_tile_cols + cm->log2_tile_rows); - // If more than one tile group. tile_size_bytes takes the default value 4 - // and does not need to be set. For a single tile group it is set in the - // section below. - if (num_tg_hdrs == 1) { - int tile_size_bytes = 4, unused; - const uint32_t tile_data_offset = (uint32_t)(tile_data_start - dst); - const uint32_t tile_data_size = total_size - tile_data_offset; - - total_size = - remux_tiles(cm, tile_data_start, tile_data_size, max_tile_size, - max_tile_col_size, &tile_size_bytes, &unused); - total_size += tile_data_offset; - assert(tile_size_bytes >= 1 && tile_size_bytes <= 4); - - aom_wb_overwrite_literal(saved_wb, tile_size_bytes - 1, 2); - - // Update the OBU length if remux_tiles() reduced the size. - uint64_t payload_size; - size_t length_field_size; - int res = - aom_uleb_decode(dst + obu_header_size, total_size - obu_header_size, - &payload_size, &length_field_size); - assert(res == 0); - (void)res; - - const uint64_t new_payload_size = - total_size - obu_header_size - length_field_size; - if (new_payload_size != payload_size) { - size_t new_length_field_size; - res = aom_uleb_encode(new_payload_size, length_field_size, - dst + obu_header_size, &new_length_field_size); - assert(res == 0); - if (new_length_field_size < length_field_size) { - const size_t src_offset = obu_header_size + length_field_size; - const size_t dst_offset = obu_header_size + new_length_field_size; - memmove(dst + dst_offset, dst + src_offset, (size_t)payload_size); - total_size -= (int)(length_field_size - new_length_field_size); - } - } - } - } - return total_size; -} - -int av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size) { - uint8_t *data = dst; - uint32_t data_size; - AV1_COMMON *const cm = &cpi->common; - uint32_t obu_header_size = 0; - uint32_t obu_payload_size = 0; - FrameHeaderInfo fh_info = { NULL, 0, 0 }; - const uint8_t obu_extension_header = - cm->temporal_layer_id << 5 | cm->spatial_layer_id << 3 | 0; - -#if CONFIG_BITSTREAM_DEBUG - bitstream_queue_reset_write(); -#endif - - // The TD is now written outside the frame encode loop - - // write sequence header obu if KEY_FRAME, preceded by 4-byte size - if (cm->frame_type == KEY_FRAME && cm->show_frame) { - obu_header_size = write_obu_header(OBU_SEQUENCE_HEADER, 0, data); - - obu_payload_size = write_sequence_header_obu(cpi, data + obu_header_size); - const size_t length_field_size = - obu_memmove(obu_header_size, obu_payload_size, data); - if (write_uleb_obu_size(obu_header_size, obu_payload_size, data) != - AOM_CODEC_OK) { - return AOM_CODEC_ERROR; - } - - data += obu_header_size + obu_payload_size + length_field_size; - } - - const int write_frame_header = - (cm->num_tg > 1 || encode_show_existing_frame(cm)); - struct aom_write_bit_buffer saved_wb; - if (write_frame_header) { - // Write Frame Header OBU. - fh_info.frame_header = data; - obu_header_size = - write_obu_header(OBU_FRAME_HEADER, obu_extension_header, data); - obu_payload_size = - write_frame_header_obu(cpi, &saved_wb, data + obu_header_size, 1); - - const size_t length_field_size = - obu_memmove(obu_header_size, obu_payload_size, data); - if (write_uleb_obu_size(obu_header_size, obu_payload_size, data) != - AOM_CODEC_OK) { - return AOM_CODEC_ERROR; - } - - fh_info.obu_header_byte_offset = 0; - fh_info.total_length = - obu_header_size + obu_payload_size + length_field_size; - data += fh_info.total_length; - - // Since length_field_size is determined adaptively after frame header - // encoding, saved_wb must be adjusted accordingly. - saved_wb.bit_buffer += length_field_size; - } - - if (encode_show_existing_frame(cm)) { - data_size = 0; - } else { - // Each tile group obu will be preceded by 4-byte size of the tile group - // obu - data_size = write_tiles_in_tg_obus(cpi, data, &saved_wb, - obu_extension_header, &fh_info); - } - data += data_size; - *size = data - dst; - return AOM_CODEC_OK; -} diff --git a/third_party/aom/av1/encoder/bitstream.h b/third_party/aom/av1/encoder/bitstream.h deleted file mode 100644 index 465ccaed5..000000000 --- a/third_party/aom/av1/encoder/bitstream.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_BITSTREAM_H_ -#define AOM_AV1_ENCODER_BITSTREAM_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "av1/encoder/encoder.h" - -struct aom_write_bit_buffer; - -// Writes only the OBU Sequence Header payload, and returns the size of the -// payload written to 'dst'. This function does not write the OBU header, the -// optional extension, or the OBU size to 'dst'. -uint32_t write_sequence_header_obu(AV1_COMP *cpi, uint8_t *const dst); - -// Writes the OBU header byte, and the OBU header extension byte when -// 'obu_extension' is non-zero. Returns number of bytes written to 'dst'. -uint32_t write_obu_header(OBU_TYPE obu_type, int obu_extension, - uint8_t *const dst); - -int write_uleb_obu_size(uint32_t obu_header_size, uint32_t obu_payload_size, - uint8_t *dest); - -int av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dest, size_t *size); - -static INLINE int av1_preserve_existing_gf(AV1_COMP *cpi) { - // Do not swap gf and arf indices for internal overlay frames - return cpi->rc.is_src_frame_alt_ref && !cpi->rc.is_src_frame_ext_arf; -} - -void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd, - int blk_row, int blk_col, int plane, TX_SIZE tx_size, - aom_writer *w); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_BITSTREAM_H_ diff --git a/third_party/aom/av1/encoder/block.h b/third_party/aom/av1/encoder/block.h deleted file mode 100644 index 0bc5dea82..000000000 --- a/third_party/aom/av1/encoder/block.h +++ /dev/null @@ -1,452 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_BLOCK_H_ -#define AOM_AV1_ENCODER_BLOCK_H_ - -#include "av1/common/entropymv.h" -#include "av1/common/entropy.h" -#include "av1/common/mvref_common.h" -#include "av1/encoder/hash.h" -#if CONFIG_DIST_8X8 -#include "aom/aomcx.h" -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - unsigned int sse; - int sum; - unsigned int var; -} DIFF; - -typedef struct macroblock_plane { - DECLARE_ALIGNED(16, int16_t, src_diff[MAX_SB_SQUARE]); - tran_low_t *qcoeff; - tran_low_t *coeff; - uint16_t *eobs; - uint8_t *txb_entropy_ctx; - struct buf_2d src; - - // Quantizer setings - // These are used/accessed only in the quantization process - // RDO does not / must not depend on any of these values - // All values below share the coefficient scale/shift used in TX - const int16_t *quant_fp_QTX; - const int16_t *round_fp_QTX; - const int16_t *quant_QTX; - const int16_t *quant_shift_QTX; - const int16_t *zbin_QTX; - const int16_t *round_QTX; - const int16_t *dequant_QTX; -} MACROBLOCK_PLANE; - -typedef struct { - int txb_skip_cost[TXB_SKIP_CONTEXTS][2]; - int base_eob_cost[SIG_COEF_CONTEXTS_EOB][3]; - int base_cost[SIG_COEF_CONTEXTS][4]; - int eob_extra_cost[EOB_COEF_CONTEXTS][2]; - int dc_sign_cost[DC_SIGN_CONTEXTS][2]; - int lps_cost[LEVEL_CONTEXTS][COEFF_BASE_RANGE + 1]; -} LV_MAP_COEFF_COST; - -typedef struct { - int eob_cost[2][11]; -} LV_MAP_EOB_COST; - -typedef struct { - tran_low_t tcoeff[MAX_MB_PLANE][MAX_SB_SQUARE]; - uint16_t eobs[MAX_MB_PLANE][MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; - uint8_t txb_skip_ctx[MAX_MB_PLANE] - [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; - int dc_sign_ctx[MAX_MB_PLANE] - [MAX_SB_SQUARE / (TX_SIZE_W_MIN * TX_SIZE_H_MIN)]; -} CB_COEFF_BUFFER; - -typedef struct { - int16_t mode_context[MODE_CTX_REF_FRAMES]; - // TODO(angiebird): Reduce the buffer size according to sb_type - tran_low_t *tcoeff[MAX_MB_PLANE]; - uint16_t *eobs[MAX_MB_PLANE]; - uint8_t *txb_skip_ctx[MAX_MB_PLANE]; - int *dc_sign_ctx[MAX_MB_PLANE]; - uint8_t ref_mv_count[MODE_CTX_REF_FRAMES]; - CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE]; - int_mv global_mvs[REF_FRAMES]; - int16_t compound_mode_context[MODE_CTX_REF_FRAMES]; -} MB_MODE_INFO_EXT; - -typedef struct { - int col_min; - int col_max; - int row_min; - int row_max; -} MvLimits; - -typedef struct { - uint8_t best_palette_color_map[MAX_PALETTE_SQUARE]; - int kmeans_data_buf[2 * MAX_PALETTE_SQUARE]; -} PALETTE_BUFFER; - -typedef struct { - TX_SIZE tx_size; - TX_SIZE inter_tx_size[INTER_TX_SIZE_BUF_LEN]; - uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; - TX_TYPE txk_type[TXK_TYPE_BUF_LEN]; - RD_STATS rd_stats; - uint32_t hash_value; -} MB_RD_INFO; - -#define RD_RECORD_BUFFER_LEN 8 -typedef struct { - MB_RD_INFO tx_rd_info[RD_RECORD_BUFFER_LEN]; // Circular buffer. - int index_start; - int num; - CRC32C crc_calculator; // Hash function. -} MB_RD_RECORD; - -typedef struct { - int64_t dist; - int64_t sse; - int rate; - uint16_t eob; - TX_TYPE tx_type; - uint16_t entropy_context; - uint8_t txb_entropy_ctx; - uint8_t valid; - uint8_t fast; // This is not being used now. -} TXB_RD_INFO; - -#define TX_SIZE_RD_RECORD_BUFFER_LEN 256 -typedef struct { - uint32_t hash_vals[TX_SIZE_RD_RECORD_BUFFER_LEN]; - TXB_RD_INFO tx_rd_info[TX_SIZE_RD_RECORD_BUFFER_LEN]; - int index_start; - int num; -} TXB_RD_RECORD; - -typedef struct tx_size_rd_info_node { - TXB_RD_INFO *rd_info_array; // Points to array of size TX_TYPES. - struct tx_size_rd_info_node *children[4]; -} TXB_RD_INFO_NODE; - -// Region size for mode decision sampling in the first pass of partition -// search(two_pass_partition_search speed feature), in units of mi size(4). -// Used by the mode_pruning_based_on_two_pass_partition_search speed feature. -#define FIRST_PARTITION_PASS_SAMPLE_REGION 8 -#define FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2 3 -#define FIRST_PARTITION_PASS_STATS_TABLES \ - (MAX_MIB_SIZE >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2) * \ - (MAX_MIB_SIZE >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2) -#define FIRST_PARTITION_PASS_STATS_STRIDE \ - (MAX_MIB_SIZE_LOG2 - FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2) - -static INLINE int av1_first_partition_pass_stats_index(int mi_row, int mi_col) { - const int row = - (mi_row & MAX_MIB_MASK) >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2; - const int col = - (mi_col & MAX_MIB_MASK) >> FIRST_PARTITION_PASS_SAMPLE_REGION_LOG2; - return (row << FIRST_PARTITION_PASS_STATS_STRIDE) + col; -} - -typedef struct { - uint8_t ref0_counts[REF_FRAMES]; // Counters for ref_frame[0]. - uint8_t ref1_counts[REF_FRAMES]; // Counters for ref_frame[1]. - int sample_counts; // Number of samples collected. -} FIRST_PARTITION_PASS_STATS; - -#define MAX_INTERP_FILTER_STATS 64 -typedef struct { - InterpFilters filters; - int_mv mv[2]; - int8_t ref_frames[2]; - COMPOUND_TYPE comp_type; -} INTERPOLATION_FILTER_STATS; - -typedef struct macroblock MACROBLOCK; -struct macroblock { - struct macroblock_plane plane[MAX_MB_PLANE]; - - // Determine if one would go with reduced complexity transform block - // search model to select prediction modes, or full complexity model - // to select transform kernel. - int rd_model; - - // Indicate if the encoder is running in the first pass partition search. - // In that case, apply certain speed features therein to reduce the overhead - // cost in the first pass search. - int cb_partition_scan; - - FIRST_PARTITION_PASS_STATS - first_partition_pass_stats[FIRST_PARTITION_PASS_STATS_TABLES]; - - // [comp_idx][saved stat_idx] - INTERPOLATION_FILTER_STATS interp_filter_stats[2][MAX_INTERP_FILTER_STATS]; - int interp_filter_stats_idx[2]; - - // Activate constrained coding block partition search range. - int use_cb_search_range; - - // Inter macroblock RD search info. - MB_RD_RECORD mb_rd_record; - - // Inter transform block RD search info. for square TX sizes. - TXB_RD_RECORD txb_rd_record_8X8[(MAX_MIB_SIZE >> 1) * (MAX_MIB_SIZE >> 1)]; - TXB_RD_RECORD txb_rd_record_16X16[(MAX_MIB_SIZE >> 2) * (MAX_MIB_SIZE >> 2)]; - TXB_RD_RECORD txb_rd_record_32X32[(MAX_MIB_SIZE >> 3) * (MAX_MIB_SIZE >> 3)]; - TXB_RD_RECORD txb_rd_record_64X64[(MAX_MIB_SIZE >> 4) * (MAX_MIB_SIZE >> 4)]; - - // Intra transform block RD search info. for square TX sizes. - TXB_RD_RECORD txb_rd_record_intra; - - MACROBLOCKD e_mbd; - MB_MODE_INFO_EXT *mbmi_ext; - int skip_block; - int qindex; - - // The equivalent error at the current rdmult of one whole bit (not one - // bitcost unit). - int errorperbit; - // The equivalend SAD error of one (whole) bit at the current quantizer - // for large blocks. - int sadperbit16; - // The equivalend SAD error of one (whole) bit at the current quantizer - // for sub-8x8 blocks. - int sadperbit4; - int rdmult; - int mb_energy; - int sb_energy_level; - int *m_search_count_ptr; - int *ex_search_count_ptr; - - unsigned int txb_split_count; - - // These are set to their default values at the beginning, and then adjusted - // further in the encoding process. - BLOCK_SIZE min_partition_size; - BLOCK_SIZE max_partition_size; - - unsigned int max_mv_context[REF_FRAMES]; - unsigned int source_variance; - unsigned int pred_sse[REF_FRAMES]; - int pred_mv_sad[REF_FRAMES]; - - int *nmvjointcost; - int nmv_vec_cost[MV_JOINTS]; - int *nmvcost[2]; - int *nmvcost_hp[2]; - int **mv_cost_stack; - int **mvcost; - - int32_t *wsrc_buf; - int32_t *mask_buf; - uint8_t *above_pred_buf; - uint8_t *left_pred_buf; - - PALETTE_BUFFER *palette_buffer; - - CONV_BUF_TYPE *tmp_conv_dst; - uint8_t *tmp_obmc_bufs[2]; - - // buffer for hash value calculation of a block - // used only in av1_get_block_hash_value() - // [first hash/second hash] - // [two buffers used ping-pong] - uint32_t *hash_value_buffer[2][2]; - - CRC_CALCULATOR crc_calculator1; - CRC_CALCULATOR crc_calculator2; - int g_crc_initialized; - - // These define limits to motion vector components to prevent them - // from extending outside the UMV borders - MvLimits mv_limits; - - uint8_t blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; - - int skip; - int skip_chroma_rd; - int skip_cost[SKIP_CONTEXTS][2]; - - int skip_mode; // 0: off; 1: on - int skip_mode_cost[SKIP_CONTEXTS][2]; - - int compound_idx; - - LV_MAP_COEFF_COST coeff_costs[TX_SIZES][PLANE_TYPES]; - LV_MAP_EOB_COST eob_costs[7][2]; - uint16_t cb_offset; - - // mode costs - int intra_inter_cost[INTRA_INTER_CONTEXTS][2]; - - int mbmode_cost[BLOCK_SIZE_GROUPS][INTRA_MODES]; - int newmv_mode_cost[NEWMV_MODE_CONTEXTS][2]; - int zeromv_mode_cost[GLOBALMV_MODE_CONTEXTS][2]; - int refmv_mode_cost[REFMV_MODE_CONTEXTS][2]; - int drl_mode_cost0[DRL_MODE_CONTEXTS][2]; - - int comp_inter_cost[COMP_INTER_CONTEXTS][2]; - int single_ref_cost[REF_CONTEXTS][SINGLE_REFS - 1][2]; - int comp_ref_type_cost[COMP_REF_TYPE_CONTEXTS] - [CDF_SIZE(COMP_REFERENCE_TYPES)]; - int uni_comp_ref_cost[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1] - [CDF_SIZE(2)]; - // Cost for signaling ref_frame[0] (LAST_FRAME, LAST2_FRAME, LAST3_FRAME or - // GOLDEN_FRAME) in bidir-comp mode. - int comp_ref_cost[REF_CONTEXTS][FWD_REFS - 1][2]; - // Cost for signaling ref_frame[1] (ALTREF_FRAME, ALTREF2_FRAME, or - // BWDREF_FRAME) in bidir-comp mode. - int comp_bwdref_cost[REF_CONTEXTS][BWD_REFS - 1][2]; - int inter_compound_mode_cost[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES]; - int compound_type_cost[BLOCK_SIZES_ALL][COMPOUND_TYPES - 1]; - int wedge_idx_cost[BLOCK_SIZES_ALL][16]; - int interintra_cost[BLOCK_SIZE_GROUPS][2]; - int wedge_interintra_cost[BLOCK_SIZES_ALL][2]; - int interintra_mode_cost[BLOCK_SIZE_GROUPS][INTERINTRA_MODES]; - int motion_mode_cost[BLOCK_SIZES_ALL][MOTION_MODES]; - int motion_mode_cost1[BLOCK_SIZES_ALL][2]; - int intra_uv_mode_cost[CFL_ALLOWED_TYPES][INTRA_MODES][UV_INTRA_MODES]; - int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; - int filter_intra_cost[BLOCK_SIZES_ALL][2]; - int filter_intra_mode_cost[FILTER_INTRA_MODES]; - int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; - int partition_cost[PARTITION_CONTEXTS][EXT_PARTITION_TYPES]; - int palette_y_size_cost[PALATTE_BSIZE_CTXS][PALETTE_SIZES]; - int palette_uv_size_cost[PALATTE_BSIZE_CTXS][PALETTE_SIZES]; - int palette_y_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS] - [PALETTE_COLORS]; - int palette_uv_color_cost[PALETTE_SIZES][PALETTE_COLOR_INDEX_CONTEXTS] - [PALETTE_COLORS]; - int palette_y_mode_cost[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS][2]; - int palette_uv_mode_cost[PALETTE_UV_MODE_CONTEXTS][2]; - // The rate associated with each alpha codeword - int cfl_cost[CFL_JOINT_SIGNS][CFL_PRED_PLANES][CFL_ALPHABET_SIZE]; - int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES]; - int txfm_partition_cost[TXFM_PARTITION_CONTEXTS][2]; - int inter_tx_type_costs[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES]; - int intra_tx_type_costs[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] - [TX_TYPES]; - int angle_delta_cost[DIRECTIONAL_MODES][2 * MAX_ANGLE_DELTA + 1]; - int switchable_restore_cost[RESTORE_SWITCHABLE_TYPES]; - int wiener_restore_cost[2]; - int sgrproj_restore_cost[2]; - int intrabc_cost[2]; - - // Used to store sub partition's choices. - MV pred_mv[REF_FRAMES]; - - // Store the best motion vector during motion search - int_mv best_mv; - // Store the second best motion vector during full-pixel motion search - int_mv second_best_mv; - - // use default transform and skip transform type search for intra modes - int use_default_intra_tx_type; - // use default transform and skip transform type search for inter modes - int use_default_inter_tx_type; -#if CONFIG_DIST_8X8 - int using_dist_8x8; - aom_tune_metric tune_metric; -#endif // CONFIG_DIST_8X8 - int comp_idx_cost[COMP_INDEX_CONTEXTS][2]; - int comp_group_idx_cost[COMP_GROUP_IDX_CONTEXTS][2]; - // Bit flags for pruning tx type search, tx split, etc. - int tx_search_prune[EXT_TX_SET_TYPES]; - int must_find_valid_partition; - int tx_split_prune_flag; // Flag to skip tx split RD search. - int recalc_luma_mc_data; // Flag to indicate recalculation of MC data during - // interpolation filter search -}; - -static INLINE int is_rect_tx_allowed_bsize(BLOCK_SIZE bsize) { - static const char LUT[BLOCK_SIZES_ALL] = { - 0, // BLOCK_4X4 - 1, // BLOCK_4X8 - 1, // BLOCK_8X4 - 0, // BLOCK_8X8 - 1, // BLOCK_8X16 - 1, // BLOCK_16X8 - 0, // BLOCK_16X16 - 1, // BLOCK_16X32 - 1, // BLOCK_32X16 - 0, // BLOCK_32X32 - 1, // BLOCK_32X64 - 1, // BLOCK_64X32 - 0, // BLOCK_64X64 - 0, // BLOCK_64X128 - 0, // BLOCK_128X64 - 0, // BLOCK_128X128 - 1, // BLOCK_4X16 - 1, // BLOCK_16X4 - 1, // BLOCK_8X32 - 1, // BLOCK_32X8 - 1, // BLOCK_16X64 - 1, // BLOCK_64X16 - }; - - return LUT[bsize]; -} - -static INLINE int is_rect_tx_allowed(const MACROBLOCKD *xd, - const MB_MODE_INFO *mbmi) { - return is_rect_tx_allowed_bsize(mbmi->sb_type) && - !xd->lossless[mbmi->segment_id]; -} - -static INLINE int tx_size_to_depth(TX_SIZE tx_size, BLOCK_SIZE bsize) { - TX_SIZE ctx_size = max_txsize_rect_lookup[bsize]; - int depth = 0; - while (tx_size != ctx_size) { - depth++; - ctx_size = sub_tx_size_map[ctx_size]; - assert(depth <= MAX_TX_DEPTH); - } - return depth; -} - -static INLINE void set_blk_skip(MACROBLOCK *x, int plane, int blk_idx, - int skip) { - if (skip) - x->blk_skip[blk_idx] |= 1UL << plane; - else - x->blk_skip[blk_idx] &= ~(1UL << plane); -#ifndef NDEBUG - // Set chroma planes to uninitialized states when luma is set to check if - // it will be set later - if (plane == 0) { - x->blk_skip[blk_idx] |= 1UL << (1 + 4); - x->blk_skip[blk_idx] |= 1UL << (2 + 4); - } - - // Clear the initialization checking bit - x->blk_skip[blk_idx] &= ~(1UL << (plane + 4)); -#endif -} - -static INLINE int is_blk_skip(MACROBLOCK *x, int plane, int blk_idx) { -#ifndef NDEBUG - // Check if this is initialized - assert(!(x->blk_skip[blk_idx] & (1UL << (plane + 4)))); - - // The magic number is 0x77, this is to test if there is garbage data - assert((x->blk_skip[blk_idx] & 0x88) == 0); -#endif - return (x->blk_skip[blk_idx] >> plane) & 1; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_BLOCK_H_ diff --git a/third_party/aom/av1/encoder/blockiness.c b/third_party/aom/av1/encoder/blockiness.c deleted file mode 100644 index f7cff9e53..000000000 --- a/third_party/aom/av1/encoder/blockiness.c +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/av1_rtcd.h" -#include "config/aom_config.h" -#include "config/aom_dsp_rtcd.h" - -#include "av1/common/common.h" -#include "av1/common/filter.h" -#include "aom/aom_integer.h" -#include "aom_dsp/aom_filter.h" -#include "aom_ports/mem.h" -#include "aom_ports/system_state.h" - -static int horizontal_filter(const uint8_t *s) { - return (s[1] - s[-2]) * 2 + (s[-1] - s[0]) * 6; -} - -static int vertical_filter(const uint8_t *s, int p) { - return (s[p] - s[-2 * p]) * 2 + (s[-p] - s[0]) * 6; -} - -static int variance(int sum, int sum_squared, int size) { - return sum_squared / size - (sum / size) * (sum / size); -} -// Calculate a blockiness level for a vertical block edge. -// This function returns a new blockiness metric that's defined as - -// p0 p1 p2 p3 -// q0 q1 q2 q3 -// block edge -> -// r0 r1 r2 r3 -// s0 s1 s2 s3 - -// blockiness = p0*-2+q0*6+r0*-6+s0*2 + -// p1*-2+q1*6+r1*-6+s1*2 + -// p2*-2+q2*6+r2*-6+s2*2 + -// p3*-2+q3*6+r3*-6+s3*2 ; - -// reconstructed_blockiness = abs(blockiness from reconstructed buffer - -// blockiness from source buffer,0) -// -// I make the assumption that flat blocks are much more visible than high -// contrast blocks. As such, I scale the result of the blockiness calc -// by dividing the blockiness by the variance of the pixels on either side -// of the edge as follows: -// var_0 = (q0^2+q1^2+q2^2+q3^2) - ((q0 + q1 + q2 + q3) / 4 )^2 -// var_1 = (r0^2+r1^2+r2^2+r3^2) - ((r0 + r1 + r2 + r3) / 4 )^2 -// The returned blockiness is the scaled value -// Reconstructed blockiness / ( 1 + var_0 + var_1 ) ; -static int blockiness_vertical(const uint8_t *s, int sp, const uint8_t *r, - int rp, int size) { - int s_blockiness = 0; - int r_blockiness = 0; - int sum_0 = 0; - int sum_sq_0 = 0; - int sum_1 = 0; - int sum_sq_1 = 0; - int i; - int var_0; - int var_1; - for (i = 0; i < size; ++i, s += sp, r += rp) { - s_blockiness += horizontal_filter(s); - r_blockiness += horizontal_filter(r); - sum_0 += s[0]; - sum_sq_0 += s[0] * s[0]; - sum_1 += s[-1]; - sum_sq_1 += s[-1] * s[-1]; - } - var_0 = variance(sum_0, sum_sq_0, size); - var_1 = variance(sum_1, sum_sq_1, size); - r_blockiness = abs(r_blockiness); - s_blockiness = abs(s_blockiness); - - if (r_blockiness > s_blockiness) - return (r_blockiness - s_blockiness) / (1 + var_0 + var_1); - else - return 0; -} - -// Calculate a blockiness level for a horizontal block edge -// same as above. -static int blockiness_horizontal(const uint8_t *s, int sp, const uint8_t *r, - int rp, int size) { - int s_blockiness = 0; - int r_blockiness = 0; - int sum_0 = 0; - int sum_sq_0 = 0; - int sum_1 = 0; - int sum_sq_1 = 0; - int i; - int var_0; - int var_1; - for (i = 0; i < size; ++i, ++s, ++r) { - s_blockiness += vertical_filter(s, sp); - r_blockiness += vertical_filter(r, rp); - sum_0 += s[0]; - sum_sq_0 += s[0] * s[0]; - sum_1 += s[-sp]; - sum_sq_1 += s[-sp] * s[-sp]; - } - var_0 = variance(sum_0, sum_sq_0, size); - var_1 = variance(sum_1, sum_sq_1, size); - r_blockiness = abs(r_blockiness); - s_blockiness = abs(s_blockiness); - - if (r_blockiness > s_blockiness) - return (r_blockiness - s_blockiness) / (1 + var_0 + var_1); - else - return 0; -} - -// This function returns the blockiness for the entire frame currently by -// looking at all borders in steps of 4. -double av1_get_blockiness(const unsigned char *img1, int img1_pitch, - const unsigned char *img2, int img2_pitch, int width, - int height) { - double blockiness = 0; - int i, j; - aom_clear_system_state(); - for (i = 0; i < height; - i += 4, img1 += img1_pitch * 4, img2 += img2_pitch * 4) { - for (j = 0; j < width; j += 4) { - if (i > 0 && i < height && j > 0 && j < width) { - blockiness += - blockiness_vertical(img1 + j, img1_pitch, img2 + j, img2_pitch, 4); - blockiness += blockiness_horizontal(img1 + j, img1_pitch, img2 + j, - img2_pitch, 4); - } - } - } - blockiness /= width * height / 16; - return blockiness; -} diff --git a/third_party/aom/av1/encoder/context_tree.c b/third_party/aom/av1/encoder/context_tree.c deleted file mode 100644 index 57f59f304..000000000 --- a/third_party/aom/av1/encoder/context_tree.c +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "av1/encoder/context_tree.h" -#include "av1/encoder/encoder.h" - -static const BLOCK_SIZE square[MAX_SB_SIZE_LOG2 - 1] = { - BLOCK_4X4, BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64, BLOCK_128X128, -}; - -static void alloc_mode_context(AV1_COMMON *cm, int num_pix, - PICK_MODE_CONTEXT *ctx) { - const int num_planes = av1_num_planes(cm); - int i; - const int num_blk = num_pix / 16; - ctx->num_4x4_blk = num_blk; - - CHECK_MEM_ERROR(cm, ctx->blk_skip, aom_calloc(num_blk, sizeof(uint8_t))); - for (i = 0; i < num_planes; ++i) { - CHECK_MEM_ERROR(cm, ctx->coeff[i], - aom_memalign(32, num_pix * sizeof(*ctx->coeff[i]))); - CHECK_MEM_ERROR(cm, ctx->qcoeff[i], - aom_memalign(32, num_pix * sizeof(*ctx->qcoeff[i]))); - CHECK_MEM_ERROR(cm, ctx->dqcoeff[i], - aom_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i]))); - CHECK_MEM_ERROR(cm, ctx->eobs[i], - aom_memalign(32, num_blk * sizeof(*ctx->eobs[i]))); - CHECK_MEM_ERROR( - cm, ctx->txb_entropy_ctx[i], - aom_memalign(32, num_blk * sizeof(*ctx->txb_entropy_ctx[i]))); - } - - if (num_pix <= MAX_PALETTE_SQUARE) { - for (i = 0; i < 2; ++i) { - CHECK_MEM_ERROR( - cm, ctx->color_index_map[i], - aom_memalign(32, num_pix * sizeof(*ctx->color_index_map[i]))); - } - } -} - -static void free_mode_context(PICK_MODE_CONTEXT *ctx, const int num_planes) { - int i; - aom_free(ctx->blk_skip); - ctx->blk_skip = 0; - for (i = 0; i < num_planes; ++i) { - aom_free(ctx->coeff[i]); - ctx->coeff[i] = 0; - aom_free(ctx->qcoeff[i]); - ctx->qcoeff[i] = 0; - aom_free(ctx->dqcoeff[i]); - ctx->dqcoeff[i] = 0; - aom_free(ctx->eobs[i]); - ctx->eobs[i] = 0; - aom_free(ctx->txb_entropy_ctx[i]); - ctx->txb_entropy_ctx[i] = 0; - } - - for (i = 0; i < 2; ++i) { - aom_free(ctx->color_index_map[i]); - ctx->color_index_map[i] = 0; - } -} - -static void alloc_tree_contexts(AV1_COMMON *cm, PC_TREE *tree, int num_pix, - int is_leaf) { - alloc_mode_context(cm, num_pix, &tree->none); - - if (is_leaf) return; - - alloc_mode_context(cm, num_pix / 2, &tree->horizontal[0]); - alloc_mode_context(cm, num_pix / 2, &tree->vertical[0]); - - alloc_mode_context(cm, num_pix / 2, &tree->horizontal[1]); - alloc_mode_context(cm, num_pix / 2, &tree->vertical[1]); - - alloc_mode_context(cm, num_pix / 4, &tree->horizontala[0]); - alloc_mode_context(cm, num_pix / 4, &tree->horizontala[1]); - alloc_mode_context(cm, num_pix / 2, &tree->horizontala[2]); - - alloc_mode_context(cm, num_pix / 2, &tree->horizontalb[0]); - alloc_mode_context(cm, num_pix / 4, &tree->horizontalb[1]); - alloc_mode_context(cm, num_pix / 4, &tree->horizontalb[2]); - - alloc_mode_context(cm, num_pix / 4, &tree->verticala[0]); - alloc_mode_context(cm, num_pix / 4, &tree->verticala[1]); - alloc_mode_context(cm, num_pix / 2, &tree->verticala[2]); - - alloc_mode_context(cm, num_pix / 2, &tree->verticalb[0]); - alloc_mode_context(cm, num_pix / 4, &tree->verticalb[1]); - alloc_mode_context(cm, num_pix / 4, &tree->verticalb[2]); - - for (int i = 0; i < 4; ++i) { - alloc_mode_context(cm, num_pix / 4, &tree->horizontal4[i]); - alloc_mode_context(cm, num_pix / 4, &tree->vertical4[i]); - } -} - -static void free_tree_contexts(PC_TREE *tree, const int num_planes) { - int i; - for (i = 0; i < 3; i++) { - free_mode_context(&tree->horizontala[i], num_planes); - free_mode_context(&tree->horizontalb[i], num_planes); - free_mode_context(&tree->verticala[i], num_planes); - free_mode_context(&tree->verticalb[i], num_planes); - } - for (i = 0; i < 4; ++i) { - free_mode_context(&tree->horizontal4[i], num_planes); - free_mode_context(&tree->vertical4[i], num_planes); - } - free_mode_context(&tree->none, num_planes); - free_mode_context(&tree->horizontal[0], num_planes); - free_mode_context(&tree->horizontal[1], num_planes); - free_mode_context(&tree->vertical[0], num_planes); - free_mode_context(&tree->vertical[1], num_planes); -} - -// This function sets up a tree of contexts such that at each square -// partition level. There are contexts for none, horizontal, vertical, and -// split. Along with a block_size value and a selected block_size which -// represents the state of our search. -void av1_setup_pc_tree(AV1_COMMON *cm, ThreadData *td) { - int i, j; - const int tree_nodes_inc = 1024; - const int leaf_factor = 4; - const int leaf_nodes = 256 * leaf_factor; - const int tree_nodes = tree_nodes_inc + 256 + 64 + 16 + 4 + 1; - int pc_tree_index = 0; - PC_TREE *this_pc; - int square_index = 1; - int nodes; - - aom_free(td->pc_tree); - CHECK_MEM_ERROR(cm, td->pc_tree, - aom_calloc(tree_nodes, sizeof(*td->pc_tree))); - this_pc = &td->pc_tree[0]; - - // Sets up all the leaf nodes in the tree. - for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) { - PC_TREE *const tree = &td->pc_tree[pc_tree_index]; - tree->block_size = square[0]; - alloc_tree_contexts(cm, tree, 16, 1); - } - - // Each node has 4 leaf nodes, fill each block_size level of the tree - // from leafs to the root. - for (nodes = leaf_nodes >> 2; nodes > 0; nodes >>= 2) { - for (i = 0; i < nodes; ++i) { - PC_TREE *const tree = &td->pc_tree[pc_tree_index]; - alloc_tree_contexts(cm, tree, 16 << (2 * square_index), 0); - tree->block_size = square[square_index]; - for (j = 0; j < 4; j++) tree->split[j] = this_pc++; - ++pc_tree_index; - } - ++square_index; - } - - // Set up the root node for the largest superblock size - i = MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2; - td->pc_root[i] = &td->pc_tree[tree_nodes - 1]; - td->pc_root[i]->none.best_mode_index = 2; - // Set up the root nodes for the rest of the possible superblock sizes - while (--i >= 0) { - td->pc_root[i] = td->pc_root[i + 1]->split[0]; - td->pc_root[i]->none.best_mode_index = 2; - } -} - -void av1_free_pc_tree(ThreadData *td, const int num_planes) { - if (td->pc_tree != NULL) { - const int tree_nodes_inc = 1024; - const int tree_nodes = tree_nodes_inc + 256 + 64 + 16 + 4 + 1; - for (int i = 0; i < tree_nodes; ++i) { - free_tree_contexts(&td->pc_tree[i], num_planes); - } - aom_free(td->pc_tree); - td->pc_tree = NULL; - } -} - -void av1_copy_tree_context(PICK_MODE_CONTEXT *dst_ctx, - PICK_MODE_CONTEXT *src_ctx) { - dst_ctx->mic = src_ctx->mic; - dst_ctx->mbmi_ext = src_ctx->mbmi_ext; - - dst_ctx->num_4x4_blk = src_ctx->num_4x4_blk; - dst_ctx->skip = src_ctx->skip; - dst_ctx->skippable = src_ctx->skippable; - dst_ctx->best_mode_index = src_ctx->best_mode_index; - - memcpy(dst_ctx->blk_skip, src_ctx->blk_skip, - sizeof(uint8_t) * src_ctx->num_4x4_blk); - - dst_ctx->hybrid_pred_diff = src_ctx->hybrid_pred_diff; - dst_ctx->comp_pred_diff = src_ctx->comp_pred_diff; - dst_ctx->single_pred_diff = src_ctx->single_pred_diff; - - dst_ctx->rate = src_ctx->rate; - dst_ctx->dist = src_ctx->dist; - dst_ctx->rdcost = src_ctx->rdcost; - dst_ctx->rd_mode_is_ready = src_ctx->rd_mode_is_ready; - - memcpy(dst_ctx->pred_mv, src_ctx->pred_mv, sizeof(MV) * REF_FRAMES); - dst_ctx->pred_interp_filter = src_ctx->pred_interp_filter; - - dst_ctx->partition = src_ctx->partition; -} diff --git a/third_party/aom/av1/encoder/context_tree.h b/third_party/aom/av1/encoder/context_tree.h deleted file mode 100644 index 4efc34985..000000000 --- a/third_party/aom/av1/encoder/context_tree.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_CONTEXT_TREE_H_ -#define AOM_AV1_ENCODER_CONTEXT_TREE_H_ - -#include "av1/common/blockd.h" -#include "av1/encoder/block.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct AV1_COMP; -struct AV1Common; -struct ThreadData; - -typedef enum { - // Search all the partition types in this plane. - SEARCH_FULL_PLANE = 0, - // Only search none_partition coding block. - NONE_PARTITION_PLANE = 1, - // Search all the partition types in this plane except split. - SEARCH_SAME_PLANE = 2, - // Skip search partition on this plane. Go split directly. - SPLIT_PLANE = 3, -} CB_TREE_SEARCH; - -// Structure to hold snapshot of coding context during the mode picking process -typedef struct { - MB_MODE_INFO mic; - MB_MODE_INFO_EXT mbmi_ext; - uint8_t *color_index_map[2]; - uint8_t *blk_skip; - - tran_low_t *coeff[MAX_MB_PLANE]; - tran_low_t *qcoeff[MAX_MB_PLANE]; - tran_low_t *dqcoeff[MAX_MB_PLANE]; - uint16_t *eobs[MAX_MB_PLANE]; - uint8_t *txb_entropy_ctx[MAX_MB_PLANE]; - - int num_4x4_blk; - int skip; - // For current partition, only if all Y, U, and V transform blocks' - // coefficients are quantized to 0, skippable is set to 1. - int skippable; - int best_mode_index; - int hybrid_pred_diff; - int comp_pred_diff; - int single_pred_diff; - // Skip certain ref frames during RD search of rectangular partitions. - int skip_ref_frame_mask; - - // TODO(jingning) Use RD_COST struct here instead. This involves a boarder - // scope of refactoring. - int rate; - int64_t dist; - int64_t rdcost; - int rd_mode_is_ready; // Flag to indicate whether rd pick mode decision has - // been made. - - // motion vector cache for adaptive motion search control in partition - // search loop - MV pred_mv[REF_FRAMES]; - InterpFilter pred_interp_filter; - PARTITION_TYPE partition; -} PICK_MODE_CONTEXT; - -typedef struct { - int valid; - int split; - int skip; - int64_t rdcost; - int sub_block_split[4]; - int sub_block_skip[4]; - int64_t sub_block_rdcost[4]; -} PC_TREE_STATS; - -typedef struct PC_TREE { - int index; - PARTITION_TYPE partitioning; - BLOCK_SIZE block_size; - PICK_MODE_CONTEXT none; - PICK_MODE_CONTEXT horizontal[2]; - PICK_MODE_CONTEXT vertical[2]; - PICK_MODE_CONTEXT horizontala[3]; - PICK_MODE_CONTEXT horizontalb[3]; - PICK_MODE_CONTEXT verticala[3]; - PICK_MODE_CONTEXT verticalb[3]; - PICK_MODE_CONTEXT horizontal4[4]; - PICK_MODE_CONTEXT vertical4[4]; - CB_TREE_SEARCH cb_search_range; - struct PC_TREE *split[4]; - PC_TREE_STATS pc_tree_stats; -} PC_TREE; - -void av1_setup_pc_tree(struct AV1Common *cm, struct ThreadData *td); -void av1_free_pc_tree(struct ThreadData *td, const int num_planes); -void av1_copy_tree_context(PICK_MODE_CONTEXT *dst_ctx, - PICK_MODE_CONTEXT *src_ctx); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_CONTEXT_TREE_H_ diff --git a/third_party/aom/av1/encoder/corner_detect.c b/third_party/aom/av1/encoder/corner_detect.c deleted file mode 100644 index e4c59dd9c..000000000 --- a/third_party/aom/av1/encoder/corner_detect.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include -#include -#include - -#include "third_party/fastfeat/fast.h" - -#include "av1/encoder/corner_detect.h" - -// Fast_9 wrapper -#define FAST_BARRIER 18 -int fast_corner_detect(unsigned char *buf, int width, int height, int stride, - int *points, int max_points) { - int num_points; - xy *const frm_corners_xy = fast9_detect_nonmax(buf, width, height, stride, - FAST_BARRIER, &num_points); - num_points = (num_points <= max_points ? num_points : max_points); - if (num_points > 0 && frm_corners_xy) { - memcpy(points, frm_corners_xy, sizeof(*frm_corners_xy) * num_points); - free(frm_corners_xy); - return num_points; - } - free(frm_corners_xy); - return 0; -} diff --git a/third_party/aom/av1/encoder/corner_detect.h b/third_party/aom/av1/encoder/corner_detect.h deleted file mode 100644 index cab59a774..000000000 --- a/third_party/aom/av1/encoder/corner_detect.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_CORNER_DETECT_H_ -#define AOM_AV1_ENCODER_CORNER_DETECT_H_ - -#include -#include -#include - -int fast_corner_detect(unsigned char *buf, int width, int height, int stride, - int *points, int max_points); - -#endif // AOM_AV1_ENCODER_CORNER_DETECT_H_ diff --git a/third_party/aom/av1/encoder/corner_match.c b/third_party/aom/av1/encoder/corner_match.c deleted file mode 100644 index 29e934deb..000000000 --- a/third_party/aom/av1/encoder/corner_match.c +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include - -#include "config/av1_rtcd.h" - -#include "av1/encoder/corner_match.h" - -#define SEARCH_SZ 9 -#define SEARCH_SZ_BY2 ((SEARCH_SZ - 1) / 2) - -#define THRESHOLD_NCC 0.75 - -/* Compute var(im) * MATCH_SZ_SQ over a MATCH_SZ by MATCH_SZ window of im, - centered at (x, y). -*/ -static double compute_variance(unsigned char *im, int stride, int x, int y) { - int sum = 0; - int sumsq = 0; - int var; - int i, j; - for (i = 0; i < MATCH_SZ; ++i) - for (j = 0; j < MATCH_SZ; ++j) { - sum += im[(i + y - MATCH_SZ_BY2) * stride + (j + x - MATCH_SZ_BY2)]; - sumsq += im[(i + y - MATCH_SZ_BY2) * stride + (j + x - MATCH_SZ_BY2)] * - im[(i + y - MATCH_SZ_BY2) * stride + (j + x - MATCH_SZ_BY2)]; - } - var = sumsq * MATCH_SZ_SQ - sum * sum; - return (double)var; -} - -/* Compute corr(im1, im2) * MATCH_SZ * stddev(im1), where the - correlation/standard deviation are taken over MATCH_SZ by MATCH_SZ windows - of each image, centered at (x1, y1) and (x2, y2) respectively. -*/ -double compute_cross_correlation_c(unsigned char *im1, int stride1, int x1, - int y1, unsigned char *im2, int stride2, - int x2, int y2) { - int v1, v2; - int sum1 = 0; - int sum2 = 0; - int sumsq2 = 0; - int cross = 0; - int var2, cov; - int i, j; - for (i = 0; i < MATCH_SZ; ++i) - for (j = 0; j < MATCH_SZ; ++j) { - v1 = im1[(i + y1 - MATCH_SZ_BY2) * stride1 + (j + x1 - MATCH_SZ_BY2)]; - v2 = im2[(i + y2 - MATCH_SZ_BY2) * stride2 + (j + x2 - MATCH_SZ_BY2)]; - sum1 += v1; - sum2 += v2; - sumsq2 += v2 * v2; - cross += v1 * v2; - } - var2 = sumsq2 * MATCH_SZ_SQ - sum2 * sum2; - cov = cross * MATCH_SZ_SQ - sum1 * sum2; - return cov / sqrt((double)var2); -} - -static int is_eligible_point(int pointx, int pointy, int width, int height) { - return (pointx >= MATCH_SZ_BY2 && pointy >= MATCH_SZ_BY2 && - pointx + MATCH_SZ_BY2 < width && pointy + MATCH_SZ_BY2 < height); -} - -static int is_eligible_distance(int point1x, int point1y, int point2x, - int point2y, int width, int height) { - const int thresh = (width < height ? height : width) >> 4; - return ((point1x - point2x) * (point1x - point2x) + - (point1y - point2y) * (point1y - point2y)) <= thresh * thresh; -} - -static void improve_correspondence(unsigned char *frm, unsigned char *ref, - int width, int height, int frm_stride, - int ref_stride, - Correspondence *correspondences, - int num_correspondences) { - int i; - for (i = 0; i < num_correspondences; ++i) { - int x, y, best_x = 0, best_y = 0; - double best_match_ncc = 0.0; - for (y = -SEARCH_SZ_BY2; y <= SEARCH_SZ_BY2; ++y) { - for (x = -SEARCH_SZ_BY2; x <= SEARCH_SZ_BY2; ++x) { - double match_ncc; - if (!is_eligible_point(correspondences[i].rx + x, - correspondences[i].ry + y, width, height)) - continue; - if (!is_eligible_distance(correspondences[i].x, correspondences[i].y, - correspondences[i].rx + x, - correspondences[i].ry + y, width, height)) - continue; - match_ncc = compute_cross_correlation( - frm, frm_stride, correspondences[i].x, correspondences[i].y, ref, - ref_stride, correspondences[i].rx + x, correspondences[i].ry + y); - if (match_ncc > best_match_ncc) { - best_match_ncc = match_ncc; - best_y = y; - best_x = x; - } - } - } - correspondences[i].rx += best_x; - correspondences[i].ry += best_y; - } - for (i = 0; i < num_correspondences; ++i) { - int x, y, best_x = 0, best_y = 0; - double best_match_ncc = 0.0; - for (y = -SEARCH_SZ_BY2; y <= SEARCH_SZ_BY2; ++y) - for (x = -SEARCH_SZ_BY2; x <= SEARCH_SZ_BY2; ++x) { - double match_ncc; - if (!is_eligible_point(correspondences[i].x + x, - correspondences[i].y + y, width, height)) - continue; - if (!is_eligible_distance( - correspondences[i].x + x, correspondences[i].y + y, - correspondences[i].rx, correspondences[i].ry, width, height)) - continue; - match_ncc = compute_cross_correlation( - ref, ref_stride, correspondences[i].rx, correspondences[i].ry, frm, - frm_stride, correspondences[i].x + x, correspondences[i].y + y); - if (match_ncc > best_match_ncc) { - best_match_ncc = match_ncc; - best_y = y; - best_x = x; - } - } - correspondences[i].x += best_x; - correspondences[i].y += best_y; - } -} - -int determine_correspondence(unsigned char *frm, int *frm_corners, - int num_frm_corners, unsigned char *ref, - int *ref_corners, int num_ref_corners, int width, - int height, int frm_stride, int ref_stride, - int *correspondence_pts) { - // TODO(sarahparker) Improve this to include 2-way match - int i, j; - Correspondence *correspondences = (Correspondence *)correspondence_pts; - int num_correspondences = 0; - for (i = 0; i < num_frm_corners; ++i) { - double best_match_ncc = 0.0; - double template_norm; - int best_match_j = -1; - if (!is_eligible_point(frm_corners[2 * i], frm_corners[2 * i + 1], width, - height)) - continue; - for (j = 0; j < num_ref_corners; ++j) { - double match_ncc; - if (!is_eligible_point(ref_corners[2 * j], ref_corners[2 * j + 1], width, - height)) - continue; - if (!is_eligible_distance(frm_corners[2 * i], frm_corners[2 * i + 1], - ref_corners[2 * j], ref_corners[2 * j + 1], - width, height)) - continue; - match_ncc = compute_cross_correlation( - frm, frm_stride, frm_corners[2 * i], frm_corners[2 * i + 1], ref, - ref_stride, ref_corners[2 * j], ref_corners[2 * j + 1]); - if (match_ncc > best_match_ncc) { - best_match_ncc = match_ncc; - best_match_j = j; - } - } - // Note: We want to test if the best correlation is >= THRESHOLD_NCC, - // but need to account for the normalization in compute_cross_correlation. - template_norm = compute_variance(frm, frm_stride, frm_corners[2 * i], - frm_corners[2 * i + 1]); - if (best_match_ncc > THRESHOLD_NCC * sqrt(template_norm)) { - correspondences[num_correspondences].x = frm_corners[2 * i]; - correspondences[num_correspondences].y = frm_corners[2 * i + 1]; - correspondences[num_correspondences].rx = ref_corners[2 * best_match_j]; - correspondences[num_correspondences].ry = - ref_corners[2 * best_match_j + 1]; - num_correspondences++; - } - } - improve_correspondence(frm, ref, width, height, frm_stride, ref_stride, - correspondences, num_correspondences); - return num_correspondences; -} diff --git a/third_party/aom/av1/encoder/corner_match.h b/third_party/aom/av1/encoder/corner_match.h deleted file mode 100644 index 535d2faed..000000000 --- a/third_party/aom/av1/encoder/corner_match.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#ifndef AOM_AV1_ENCODER_CORNER_MATCH_H_ -#define AOM_AV1_ENCODER_CORNER_MATCH_H_ - -#include -#include -#include - -#define MATCH_SZ 13 -#define MATCH_SZ_BY2 ((MATCH_SZ - 1) / 2) -#define MATCH_SZ_SQ (MATCH_SZ * MATCH_SZ) - -typedef struct { - int x, y; - int rx, ry; -} Correspondence; - -int determine_correspondence(unsigned char *frm, int *frm_corners, - int num_frm_corners, unsigned char *ref, - int *ref_corners, int num_ref_corners, int width, - int height, int frm_stride, int ref_stride, - int *correspondence_pts); - -#endif // AOM_AV1_ENCODER_CORNER_MATCH_H_ diff --git a/third_party/aom/av1/encoder/cost.c b/third_party/aom/av1/encoder/cost.c deleted file mode 100644 index 323e2aed5..000000000 --- a/third_party/aom/av1/encoder/cost.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#include - -#include "av1/encoder/cost.h" -#include "av1/common/entropy.h" - -// round(-log2(i/256.) * (1 << AV1_PROB_COST_SHIFT)); i = 128~255. -const uint16_t av1_prob_cost[128] = { - 512, 506, 501, 495, 489, 484, 478, 473, 467, 462, 456, 451, 446, 441, 435, - 430, 425, 420, 415, 410, 405, 400, 395, 390, 385, 380, 375, 371, 366, 361, - 356, 352, 347, 343, 338, 333, 329, 324, 320, 316, 311, 307, 302, 298, 294, - 289, 285, 281, 277, 273, 268, 264, 260, 256, 252, 248, 244, 240, 236, 232, - 228, 224, 220, 216, 212, 209, 205, 201, 197, 194, 190, 186, 182, 179, 175, - 171, 168, 164, 161, 157, 153, 150, 146, 143, 139, 136, 132, 129, 125, 122, - 119, 115, 112, 109, 105, 102, 99, 95, 92, 89, 86, 82, 79, 76, 73, - 70, 66, 63, 60, 57, 54, 51, 48, 45, 42, 38, 35, 32, 29, 26, - 23, 20, 18, 15, 12, 9, 6, 3, -}; - -void av1_cost_tokens_from_cdf(int *costs, const aom_cdf_prob *cdf, - const int *inv_map) { - int i; - aom_cdf_prob prev_cdf = 0; - for (i = 0;; ++i) { - aom_cdf_prob p15 = AOM_ICDF(cdf[i]) - prev_cdf; - p15 = (p15 < EC_MIN_PROB) ? EC_MIN_PROB : p15; - prev_cdf = AOM_ICDF(cdf[i]); - - if (inv_map) - costs[inv_map[i]] = av1_cost_symbol(p15); - else - costs[i] = av1_cost_symbol(p15); - - // Stop once we reach the end of the CDF - if (cdf[i] == AOM_ICDF(CDF_PROB_TOP)) break; - } -} diff --git a/third_party/aom/av1/encoder/cost.h b/third_party/aom/av1/encoder/cost.h deleted file mode 100644 index af5b09837..000000000 --- a/third_party/aom/av1/encoder/cost.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_COST_H_ -#define AOM_AV1_ENCODER_COST_H_ - -#include "aom_dsp/prob.h" -#include "aom/aom_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -extern const uint16_t av1_prob_cost[128]; - -// The factor to scale from cost in bits to cost in av1_prob_cost units. -#define AV1_PROB_COST_SHIFT 9 - -// Cost of coding an n bit literal, using 128 (i.e. 50%) probability -// for each bit. -#define av1_cost_literal(n) ((n) * (1 << AV1_PROB_COST_SHIFT)) - -// Calculate the cost of a symbol with probability p15 / 2^15 -static INLINE int av1_cost_symbol(aom_cdf_prob p15) { - assert(0 < p15 && p15 < CDF_PROB_TOP); - const int shift = CDF_PROB_BITS - 1 - get_msb(p15); - const int prob = get_prob(p15 << shift, CDF_PROB_TOP); - assert(prob >= 128); - return av1_prob_cost[prob - 128] + av1_cost_literal(shift); -} - -void av1_cost_tokens_from_cdf(int *costs, const aom_cdf_prob *cdf, - const int *inv_map); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_COST_H_ diff --git a/third_party/aom/av1/encoder/dwt.c b/third_party/aom/av1/encoder/dwt.c deleted file mode 100644 index 04088b25f..000000000 --- a/third_party/aom/av1/encoder/dwt.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include - -#include "config/av1_rtcd.h" -#include "av1/encoder/dwt.h" - -// Note: block length must be even for this implementation -static void analysis_53_row(int length, tran_low_t *x, tran_low_t *lowpass, - tran_low_t *highpass) { - int n; - tran_low_t r, *a, *b; - - n = length >> 1; - b = highpass; - a = lowpass; - while (--n) { - *a++ = (r = *x++) * 2; - *b++ = *x - ((r + x[1] + 1) >> 1); - x++; - } - *a = (r = *x++) * 2; - *b = *x - r; - - n = length >> 1; - b = highpass; - a = lowpass; - r = *highpass; - while (n--) { - *a++ += (r + (*b) + 1) >> 1; - r = *b++; - } -} - -static void analysis_53_col(int length, tran_low_t *x, tran_low_t *lowpass, - tran_low_t *highpass) { - int n; - tran_low_t r, *a, *b; - - n = length >> 1; - b = highpass; - a = lowpass; - while (--n) { - *a++ = (r = *x++); - *b++ = (((*x) * 2) - (r + x[1]) + 2) >> 2; - x++; - } - *a = (r = *x++); - *b = (*x - r + 1) >> 1; - - n = length >> 1; - b = highpass; - a = lowpass; - r = *highpass; - while (n--) { - *a++ += (r + (*b) + 1) >> 1; - r = *b++; - } -} - -static void dyadic_analyze_53_uint8_input(int levels, int width, int height, - uint8_t *x, int pitch_x, - tran_low_t *c, int pitch_c, - int dwt_scale_bits, int hbd) { - int lv, i, j, nh, nw, hh = height, hw = width; - tran_low_t buffer[2 * DWT_MAX_LENGTH]; - - if (hbd) { - uint16_t *x16 = CONVERT_TO_SHORTPTR(x); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - c[i * pitch_c + j] = x16[i * pitch_x + j] << dwt_scale_bits; - } - } - } else { - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) { - c[i * pitch_c + j] = x[i * pitch_x + j] << dwt_scale_bits; - } - } - } - - for (lv = 0; lv < levels; lv++) { - nh = hh; - hh = (hh + 1) >> 1; - nw = hw; - hw = (hw + 1) >> 1; - if ((nh < 2) || (nw < 2)) return; - for (i = 0; i < nh; i++) { - memcpy(buffer, &c[i * pitch_c], nw * sizeof(tran_low_t)); - analysis_53_row(nw, buffer, &c[i * pitch_c], &c[i * pitch_c] + hw); - } - for (j = 0; j < nw; j++) { - for (i = 0; i < nh; i++) buffer[i + nh] = c[i * pitch_c + j]; - analysis_53_col(nh, buffer + nh, buffer, buffer + hh); - for (i = 0; i < nh; i++) c[i * pitch_c + j] = buffer[i]; - } - } -} - -void av1_fdwt8x8_uint8_input_c(uint8_t *input, tran_low_t *output, int stride, - int hbd) { - dyadic_analyze_53_uint8_input(4, 8, 8, input, stride, output, 8, 2, hbd); -} - -int av1_haar_ac_sad(tran_low_t *output, int bw, int bh, int stride) { - int acsad = 0; - - for (int r = 0; r < bh; ++r) - for (int c = 0; c < bw; ++c) { - if (r >= bh / 2 || c >= bw / 2) acsad += abs(output[r * stride + c]); - } - return acsad; -} - -uint64_t av1_dct_ac_sad(tran_low_t *output, int bw, int bh, int stride) { - uint64_t acsad = 0; - - for (int r = 0; r < bh; ++r) - for (int c = 0; c < bw; ++c) { - if (r > 0 || c > 0) acsad += abs(output[r * stride + c]); - } - - return acsad; -} - -uint32_t av1_variance(uint8_t *input, int bw, int bh, int stride) { - int sum = 0; - uint32_t sse = 0; - - for (int r = 0; r < bh; ++r) - for (int c = 0; c < bw; ++c) { - sum += input[r * stride + c]; - sse += input[r * stride + c] * input[r * stride + c]; - } - return sse - (uint32_t)(((int64_t)sum * sum) / (bw * bh)); -} - -int av1_haar_ac_sad_8x8_uint8_input(uint8_t *input, int stride, int hbd) { - tran_low_t output[64]; - - av1_fdwt8x8_uint8_input_c(input, output, stride, hbd); - return av1_haar_ac_sad(output, 8, 8, 8); -} diff --git a/third_party/aom/av1/encoder/dwt.h b/third_party/aom/av1/encoder/dwt.h deleted file mode 100644 index 37306c6a5..000000000 --- a/third_party/aom/av1/encoder/dwt.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_DWT_H_ -#define AOM_AV1_ENCODER_DWT_H_ - -#include "av1/common/common.h" -#include "av1/common/enums.h" - -#define DWT_MAX_LENGTH 64 - -void av1_fdwt8x8(tran_low_t *input, tran_low_t *output, int stride); -void av1_fdwt8x8_uint8_input_c(uint8_t *input, tran_low_t *output, int stride, - int hbd); -int av1_haar_ac_sad_8x8_uint8_input(uint8_t *input, int stride, int hbd); - -#endif // AOM_AV1_ENCODER_DWT_H_ diff --git a/third_party/aom/av1/encoder/encodeframe.c b/third_party/aom/av1/encoder/encodeframe.c deleted file mode 100644 index cb226c59e..000000000 --- a/third_party/aom/av1/encoder/encodeframe.c +++ /dev/null @@ -1,5739 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include - -#include "config/aom_config.h" -#include "config/aom_dsp_rtcd.h" -#include "config/av1_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/binary_codes_writer.h" -#include "aom_ports/mem.h" -#include "aom_ports/aom_timer.h" -#include "aom_ports/system_state.h" - -#if CONFIG_MISMATCH_DEBUG -#include "aom_util/debug_util.h" -#endif // CONFIG_MISMATCH_DEBUG - -#include "av1/common/cfl.h" -#include "av1/common/common.h" -#include "av1/common/entropy.h" -#include "av1/common/entropymode.h" -#include "av1/common/idct.h" -#include "av1/common/mv.h" -#include "av1/common/mvref_common.h" -#include "av1/common/pred_common.h" -#include "av1/common/quant_common.h" -#include "av1/common/reconintra.h" -#include "av1/common/reconinter.h" -#include "av1/common/seg_common.h" -#include "av1/common/tile_common.h" -#include "av1/common/warped_motion.h" - -#include "av1/encoder/aq_complexity.h" -#include "av1/encoder/aq_cyclicrefresh.h" -#include "av1/encoder/aq_variance.h" -#include "av1/encoder/global_motion.h" -#include "av1/encoder/encodeframe.h" -#include "av1/encoder/encodemb.h" -#include "av1/encoder/encodemv.h" -#include "av1/encoder/encodetxb.h" -#include "av1/encoder/ethread.h" -#include "av1/encoder/extend.h" -#include "av1/encoder/ml.h" -#include "av1/encoder/partition_model_weights.h" -#include "av1/encoder/rd.h" -#include "av1/encoder/rdopt.h" -#include "av1/encoder/reconinter_enc.h" -#include "av1/encoder/segmentation.h" -#include "av1/encoder/tokenize.h" - -static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data, - ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run, - int mi_row, int mi_col, BLOCK_SIZE bsize, - int *rate); - -// This is used as a reference when computing the source variance for the -// purposes of activity masking. -// Eventually this should be replaced by custom no-reference routines, -// which will be faster. -static const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = { - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128 -}; - -static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = { - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, - 128, 128, 128, 128, 128, 128, 128, 128 -}; - -static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = { - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, - 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4 -}; - -static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = { - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, - 128 * 16, 128 * 16 -}; - -#if CONFIG_FP_MB_STATS -static const uint8_t num_16x16_blocks_wide_lookup[BLOCK_SIZES_ALL] = { - 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 1, 1, 1, 2, 2, 4 -}; -static const uint8_t num_16x16_blocks_high_lookup[BLOCK_SIZES_ALL] = { - 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 1, 1, 2, 1, 4, 2 -}; -#endif // CONFIG_FP_MB_STATS - -unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi, - const struct buf_2d *ref, - BLOCK_SIZE bs) { - unsigned int sse; - const unsigned int var = - cpi->fn_ptr[bs].vf(ref->buf, ref->stride, AV1_VAR_OFFS, 0, &sse); - return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); -} - -unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi, - const struct buf_2d *ref, - BLOCK_SIZE bs, int bd) { - unsigned int var, sse; - switch (bd) { - case 10: - var = - cpi->fn_ptr[bs].vf(ref->buf, ref->stride, - CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10), 0, &sse); - break; - case 12: - var = - cpi->fn_ptr[bs].vf(ref->buf, ref->stride, - CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12), 0, &sse); - break; - case 8: - default: - var = - cpi->fn_ptr[bs].vf(ref->buf, ref->stride, - CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8), 0, &sse); - break; - } - return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); -} - -static unsigned int get_sby_perpixel_diff_variance(const AV1_COMP *const cpi, - const struct buf_2d *ref, - int mi_row, int mi_col, - BLOCK_SIZE bs) { - unsigned int sse, var; - uint8_t *last_y; - const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME); - - assert(last != NULL); - last_y = - &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE]; - var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse); - return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); -} - -static BLOCK_SIZE get_rd_var_based_fixed_partition(AV1_COMP *cpi, MACROBLOCK *x, - int mi_row, int mi_col) { - unsigned int var = get_sby_perpixel_diff_variance( - cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64); - if (var < 8) - return BLOCK_64X64; - else if (var < 128) - return BLOCK_32X32; - else if (var < 2048) - return BLOCK_16X16; - else - return BLOCK_8X8; -} - -// Lighter version of set_offsets that only sets the mode info -// pointers. -static void set_mode_info_offsets(const AV1_COMP *const cpi, - MACROBLOCK *const x, MACROBLOCKD *const xd, - int mi_row, int mi_col) { - const AV1_COMMON *const cm = &cpi->common; - const int idx_str = xd->mi_stride * mi_row + mi_col; - xd->mi = cm->mi_grid_visible + idx_str; - xd->mi[0] = cm->mi + idx_str; - x->mbmi_ext = cpi->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); -} - -static void set_offsets_without_segment_id(const AV1_COMP *const cpi, - const TileInfo *const tile, - MACROBLOCK *const x, int mi_row, - int mi_col, BLOCK_SIZE bsize) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *const xd = &x->e_mbd; - const int mi_width = mi_size_wide[bsize]; - const int mi_height = mi_size_high[bsize]; - - set_mode_info_offsets(cpi, x, xd, mi_row, mi_col); - - set_skip_context(xd, mi_row, mi_col, num_planes); - xd->above_txfm_context = cm->above_txfm_context[tile->tile_row] + mi_col; - xd->left_txfm_context = - xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); - - // Set up destination pointers. - av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row, - mi_col, 0, num_planes); - - // Set up limit values for MV components. - // Mv beyond the range do not produce new/different prediction block. - x->mv_limits.row_min = - -(((mi_row + mi_height) * MI_SIZE) + AOM_INTERP_EXTEND); - x->mv_limits.col_min = -(((mi_col + mi_width) * MI_SIZE) + AOM_INTERP_EXTEND); - x->mv_limits.row_max = (cm->mi_rows - mi_row) * MI_SIZE + AOM_INTERP_EXTEND; - x->mv_limits.col_max = (cm->mi_cols - mi_col) * MI_SIZE + AOM_INTERP_EXTEND; - - set_plane_n4(xd, mi_width, mi_height, num_planes); - - // Set up distance of MB to edge of frame in 1/8th pel units. - assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); - set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows, - cm->mi_cols); - - // Set up source buffers. - av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes); - - // R/D setup. - x->rdmult = cpi->rd.RDMULT; - - // required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs() - xd->tile = *tile; -} - -static void set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile, - MACROBLOCK *const x, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi; - const struct segmentation *const seg = &cm->seg; - - set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize); - - mbmi = xd->mi[0]; - xd->cfl.mi_row = mi_row; - xd->cfl.mi_col = mi_col; - - mbmi->segment_id = 0; - - // Setup segment ID. - if (seg->enabled) { - if (seg->enabled && !cpi->vaq_refresh) { - const uint8_t *const map = - seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - mbmi->segment_id = - map ? get_segment_id(cm, map, bsize, mi_row, mi_col) : 0; - } - av1_init_plane_quantizers(cpi, x, mbmi->segment_id); - } -} - -static void reset_intmv_filter_type(MB_MODE_INFO *mbmi) { - InterpFilter filters[2]; - - for (int dir = 0; dir < 2; ++dir) { - filters[dir] = av1_extract_interp_filter(mbmi->interp_filters, dir); - } - mbmi->interp_filters = av1_make_interp_filters(filters[0], filters[1]); -} - -static void update_filter_type_count(uint8_t allow_update_cdf, - FRAME_COUNTS *counts, - const MACROBLOCKD *xd, - const MB_MODE_INFO *mbmi) { - int dir; - for (dir = 0; dir < 2; ++dir) { - const int ctx = av1_get_pred_context_switchable_interp(xd, dir); - InterpFilter filter = av1_extract_interp_filter(mbmi->interp_filters, dir); - ++counts->switchable_interp[ctx][filter]; - if (allow_update_cdf) { - update_cdf(xd->tile_ctx->switchable_interp_cdf[ctx], filter, - SWITCHABLE_FILTERS); - } - } -} - -static void update_global_motion_used(PREDICTION_MODE mode, BLOCK_SIZE bsize, - const MB_MODE_INFO *mbmi, - RD_COUNTS *rdc) { - if (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) { - const int num_4x4s = mi_size_wide[bsize] * mi_size_high[bsize]; - int ref; - for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) { - rdc->global_motion_used[mbmi->ref_frame[ref]] += num_4x4s; - } - } -} - -static void reset_tx_size(MACROBLOCK *x, MB_MODE_INFO *mbmi, - const TX_MODE tx_mode) { - MACROBLOCKD *const xd = &x->e_mbd; - if (xd->lossless[mbmi->segment_id]) { - mbmi->tx_size = TX_4X4; - } else if (tx_mode != TX_MODE_SELECT) { - mbmi->tx_size = tx_size_from_tx_mode(mbmi->sb_type, tx_mode); - } else { - BLOCK_SIZE bsize = mbmi->sb_type; - TX_SIZE min_tx_size = depth_to_tx_size(MAX_TX_DEPTH, bsize); - mbmi->tx_size = (TX_SIZE)TXSIZEMAX(mbmi->tx_size, min_tx_size); - } - if (is_inter_block(mbmi)) { - memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size)); - } - memset(mbmi->txk_type, DCT_DCT, sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN); - av1_zero(x->blk_skip); - x->skip = 0; -} - -static void update_state(const AV1_COMP *const cpi, - const TileDataEnc *const tile_data, ThreadData *td, - const PICK_MODE_CONTEXT *const ctx, int mi_row, - int mi_col, BLOCK_SIZE bsize, RUN_TYPE dry_run) { - int i, x_idx, y; - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - RD_COUNTS *const rdc = &td->rd_counts; - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - struct macroblock_plane *const p = x->plane; - struct macroblockd_plane *const pd = xd->plane; - const MB_MODE_INFO *const mi = &ctx->mic; - MB_MODE_INFO *const mi_addr = xd->mi[0]; - const struct segmentation *const seg = &cm->seg; - const int bw = mi_size_wide[mi->sb_type]; - const int bh = mi_size_high[mi->sb_type]; - const int mis = cm->mi_stride; - const int mi_width = mi_size_wide[bsize]; - const int mi_height = mi_size_high[bsize]; - - assert(mi->sb_type == bsize); - - *mi_addr = *mi; - *x->mbmi_ext = ctx->mbmi_ext; - - reset_intmv_filter_type(mi_addr); - - memcpy(x->blk_skip, ctx->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); - - x->skip = ctx->skip; - - // If segmentation in use - if (seg->enabled) { - // For in frame complexity AQ copy the segment id from the segment map. - if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { - const uint8_t *const map = - seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - mi_addr->segment_id = - map ? get_segment_id(cm, map, bsize, mi_row, mi_col) : 0; - reset_tx_size(x, mi_addr, cm->tx_mode); - } - // Else for cyclic refresh mode update the segment map, set the segment id - // and then update the quantizer. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - av1_cyclic_refresh_update_segment(cpi, mi_addr, mi_row, mi_col, bsize, - ctx->rate, ctx->dist, x->skip); - reset_tx_size(x, mi_addr, cm->tx_mode); - } - if (mi_addr->uv_mode == UV_CFL_PRED && !is_cfl_allowed(xd)) - mi_addr->uv_mode = UV_DC_PRED; - } - - for (i = 0; i < num_planes; ++i) { - p[i].coeff = ctx->coeff[i]; - p[i].qcoeff = ctx->qcoeff[i]; - pd[i].dqcoeff = ctx->dqcoeff[i]; - p[i].eobs = ctx->eobs[i]; - p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i]; - } - for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i]; - // Restore the coding context of the MB to that that was in place - // when the mode was picked for it - for (y = 0; y < mi_height; y++) - for (x_idx = 0; x_idx < mi_width; x_idx++) - if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx && - (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { - xd->mi[x_idx + y * mis] = mi_addr; - } - - if (cpi->oxcf.aq_mode) av1_init_plane_quantizers(cpi, x, mi_addr->segment_id); - - if (dry_run) return; - -#if CONFIG_INTERNAL_STATS - { - unsigned int *const mode_chosen_counts = - (unsigned int *)cpi->mode_chosen_counts; // Cast const away. - if (frame_is_intra_only(cm)) { - static const int kf_mode_index[] = { - THR_DC /*DC_PRED*/, - THR_V_PRED /*V_PRED*/, - THR_H_PRED /*H_PRED*/, - THR_D45_PRED /*D45_PRED*/, - THR_D135_PRED /*D135_PRED*/, - THR_D113_PRED /*D113_PRED*/, - THR_D157_PRED /*D157_PRED*/, - THR_D203_PRED /*D203_PRED*/, - THR_D67_PRED /*D67_PRED*/, - THR_SMOOTH, /*SMOOTH_PRED*/ - THR_SMOOTH_V, /*SMOOTH_V_PRED*/ - THR_SMOOTH_H, /*SMOOTH_H_PRED*/ - THR_PAETH /*PAETH_PRED*/, - }; - ++mode_chosen_counts[kf_mode_index[mi_addr->mode]]; - } else { - // Note how often each mode chosen as best - ++mode_chosen_counts[ctx->best_mode_index]; - } - } -#endif - if (!frame_is_intra_only(cm)) { - if (is_inter_block(mi_addr)) { - // TODO(sarahparker): global motion stats need to be handled per-tile - // to be compatible with tile-based threading. - update_global_motion_used(mi_addr->mode, bsize, mi_addr, rdc); - } - - if (cm->interp_filter == SWITCHABLE && - mi_addr->motion_mode != WARPED_CAUSAL && - !is_nontrans_global_motion(xd, xd->mi[0])) { - update_filter_type_count(tile_data->allow_update_cdf, td->counts, xd, - mi_addr); - } - - rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; - rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; - rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; - } - - const int x_mis = AOMMIN(bw, cm->mi_cols - mi_col); - const int y_mis = AOMMIN(bh, cm->mi_rows - mi_row); - av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis); -} - -void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, - int mi_row, int mi_col, const int num_planes) { - // Set current frame pointer. - x->e_mbd.cur_buf = src; - - // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet - // the static analysis warnings. - for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) { - const int is_uv = i > 0; - setup_pred_plane(&x->plane[i].src, x->e_mbd.mi[0]->sb_type, src->buffers[i], - src->crop_widths[is_uv], src->crop_heights[is_uv], - src->strides[is_uv], mi_row, mi_col, NULL, - x->e_mbd.plane[i].subsampling_x, - x->e_mbd.plane[i].subsampling_y); - } -} - -static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x, - int8_t segment_id) { - const AV1_COMMON *const cm = &cpi->common; - av1_init_plane_quantizers(cpi, x, segment_id); - aom_clear_system_state(); - int segment_qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex); - return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q); -} - -static int set_deltaq_rdmult(const AV1_COMP *const cpi, MACROBLOCKD *const xd) { - const AV1_COMMON *const cm = &cpi->common; - - return av1_compute_rd_mult( - cpi, cm->base_qindex + xd->delta_qindex + cm->y_dc_delta_q); -} - -static void rd_pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data, - MACROBLOCK *const x, int mi_row, int mi_col, - RD_STATS *rd_cost, PARTITION_TYPE partition, - BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, - int64_t best_rd) { - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - TileInfo *const tile_info = &tile_data->tile_info; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi; - MB_MODE_INFO *ctx_mbmi = &ctx->mic; - struct macroblock_plane *const p = x->plane; - struct macroblockd_plane *const pd = xd->plane; - const AQ_MODE aq_mode = cpi->oxcf.aq_mode; - const DELTAQ_MODE deltaq_mode = cpi->oxcf.deltaq_mode; - int i, orig_rdmult; - - if (best_rd < 0) { - ctx->rdcost = INT64_MAX; - ctx->skip = 0; - av1_invalid_rd_stats(rd_cost); - return; - } - - aom_clear_system_state(); - - set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); - - mbmi = xd->mi[0]; - - if (ctx->rd_mode_is_ready) { - assert(ctx_mbmi->sb_type == bsize); - assert(ctx_mbmi->partition == partition); - *mbmi = *ctx_mbmi; - rd_cost->rate = ctx->rate; - rd_cost->dist = ctx->dist; - rd_cost->rdcost = ctx->rdcost; - } else { - mbmi->sb_type = bsize; - mbmi->partition = partition; - } - -#if CONFIG_RD_DEBUG - mbmi->mi_row = mi_row; - mbmi->mi_col = mi_col; -#endif - - for (i = 0; i < num_planes; ++i) { - p[i].coeff = ctx->coeff[i]; - p[i].qcoeff = ctx->qcoeff[i]; - pd[i].dqcoeff = ctx->dqcoeff[i]; - p[i].eobs = ctx->eobs[i]; - p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i]; - } - - for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i]; - - if (!ctx->rd_mode_is_ready) { - ctx->skippable = 0; - - // Set to zero to make sure we do not use the previous encoded frame stats - mbmi->skip = 0; - - // Reset skip mode flag. - mbmi->skip_mode = 0; - } - - x->skip_chroma_rd = - !is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x, - xd->plane[1].subsampling_y); - - if (ctx->rd_mode_is_ready) { - x->skip = ctx->skip; - *x->mbmi_ext = ctx->mbmi_ext; - return; - } - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - x->source_variance = av1_high_get_sby_perpixel_variance( - cpi, &x->plane[0].src, bsize, xd->bd); - } else { - x->source_variance = - av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); - } - - // Save rdmult before it might be changed, so it can be restored later. - orig_rdmult = x->rdmult; - - if (aq_mode == VARIANCE_AQ) { - if (cpi->vaq_refresh) { - const int energy = bsize <= BLOCK_16X16 - ? x->mb_energy - : av1_log_block_var(cpi, x, bsize); - mbmi->segment_id = energy; - } - x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id); - } else if (aq_mode == COMPLEXITY_AQ) { - x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id); - } else if (aq_mode == CYCLIC_REFRESH_AQ) { - // If segment is boosted, use rdmult for that segment. - if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) - x->rdmult = av1_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); - } - - if (deltaq_mode > 0) x->rdmult = set_deltaq_rdmult(cpi, xd); - - // Find best coding mode & reconstruct the MB so it is available - // as a predictor for MBs that follow in the SB - if (frame_is_intra_only(cm)) { - av1_rd_pick_intra_mode_sb(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx, - best_rd); - } else { - if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col, - rd_cost, bsize, ctx, best_rd); - } else { - av1_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, - bsize, ctx, best_rd); - } - } - - // Examine the resulting rate and for AQ mode 2 make a segment choice. - if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) && - (bsize >= BLOCK_16X16) && - (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || - cpi->refresh_alt2_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) { - av1_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate); - } - - x->rdmult = orig_rdmult; - - // TODO(jingning) The rate-distortion optimization flow needs to be - // refactored to provide proper exit/return handle. - if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX; - - ctx->rate = rd_cost->rate; - ctx->dist = rd_cost->dist; - ctx->rdcost = rd_cost->rdcost; -} - -static void update_inter_mode_stats(FRAME_CONTEXT *fc, FRAME_COUNTS *counts, - PREDICTION_MODE mode, int16_t mode_context, - uint8_t allow_update_cdf) { - (void)counts; - - int16_t mode_ctx = mode_context & NEWMV_CTX_MASK; - if (mode == NEWMV) { -#if CONFIG_ENTROPY_STATS - ++counts->newmv_mode[mode_ctx][0]; -#endif - if (allow_update_cdf) update_cdf(fc->newmv_cdf[mode_ctx], 0, 2); - return; - } else { -#if CONFIG_ENTROPY_STATS - ++counts->newmv_mode[mode_ctx][1]; -#endif - if (allow_update_cdf) update_cdf(fc->newmv_cdf[mode_ctx], 1, 2); - - mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK; - if (mode == GLOBALMV) { -#if CONFIG_ENTROPY_STATS - ++counts->zeromv_mode[mode_ctx][0]; -#endif - if (allow_update_cdf) update_cdf(fc->zeromv_cdf[mode_ctx], 0, 2); - return; - } else { -#if CONFIG_ENTROPY_STATS - ++counts->zeromv_mode[mode_ctx][1]; -#endif - if (allow_update_cdf) update_cdf(fc->zeromv_cdf[mode_ctx], 1, 2); - mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK; -#if CONFIG_ENTROPY_STATS - ++counts->refmv_mode[mode_ctx][mode != NEARESTMV]; -#endif - if (allow_update_cdf) - update_cdf(fc->refmv_cdf[mode_ctx], mode != NEARESTMV, 2); - } - } -} - -static void update_palette_cdf(MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi, - FRAME_COUNTS *counts, uint8_t allow_update_cdf) { - FRAME_CONTEXT *fc = xd->tile_ctx; - const BLOCK_SIZE bsize = mbmi->sb_type; - const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - const int palette_bsize_ctx = av1_get_palette_bsize_ctx(bsize); - - (void)counts; - - if (mbmi->mode == DC_PRED) { - const int n = pmi->palette_size[0]; - const int palette_mode_ctx = av1_get_palette_mode_ctx(xd); - -#if CONFIG_ENTROPY_STATS - ++counts->palette_y_mode[palette_bsize_ctx][palette_mode_ctx][n > 0]; -#endif - if (allow_update_cdf) - update_cdf(fc->palette_y_mode_cdf[palette_bsize_ctx][palette_mode_ctx], - n > 0, 2); - if (n > 0) { -#if CONFIG_ENTROPY_STATS - ++counts->palette_y_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE]; -#endif - if (allow_update_cdf) { - update_cdf(fc->palette_y_size_cdf[palette_bsize_ctx], - n - PALETTE_MIN_SIZE, PALETTE_SIZES); - } - } - } - - if (mbmi->uv_mode == UV_DC_PRED) { - const int n = pmi->palette_size[1]; - const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0); - -#if CONFIG_ENTROPY_STATS - ++counts->palette_uv_mode[palette_uv_mode_ctx][n > 0]; -#endif - if (allow_update_cdf) - update_cdf(fc->palette_uv_mode_cdf[palette_uv_mode_ctx], n > 0, 2); - - if (n > 0) { -#if CONFIG_ENTROPY_STATS - ++counts->palette_uv_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE]; -#endif - if (allow_update_cdf) { - update_cdf(fc->palette_uv_size_cdf[palette_bsize_ctx], - n - PALETTE_MIN_SIZE, PALETTE_SIZES); - } - } - } -} - -static void sum_intra_stats(const AV1_COMMON *const cm, FRAME_COUNTS *counts, - MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi, - const MB_MODE_INFO *above_mi, - const MB_MODE_INFO *left_mi, const int intraonly, - const int mi_row, const int mi_col, - uint8_t allow_update_cdf) { - FRAME_CONTEXT *fc = xd->tile_ctx; - const PREDICTION_MODE y_mode = mbmi->mode; - const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode; - (void)counts; - const BLOCK_SIZE bsize = mbmi->sb_type; - - if (intraonly) { -#if CONFIG_ENTROPY_STATS - const PREDICTION_MODE above = av1_above_block_mode(above_mi); - const PREDICTION_MODE left = av1_left_block_mode(left_mi); - const int above_ctx = intra_mode_context[above]; - const int left_ctx = intra_mode_context[left]; - ++counts->kf_y_mode[above_ctx][left_ctx][y_mode]; -#endif // CONFIG_ENTROPY_STATS - if (allow_update_cdf) - update_cdf(get_y_mode_cdf(fc, above_mi, left_mi), y_mode, INTRA_MODES); - } else { -#if CONFIG_ENTROPY_STATS - ++counts->y_mode[size_group_lookup[bsize]][y_mode]; -#endif // CONFIG_ENTROPY_STATS - if (allow_update_cdf) - update_cdf(fc->y_mode_cdf[size_group_lookup[bsize]], y_mode, INTRA_MODES); - } - - if (av1_filter_intra_allowed(cm, mbmi)) { - const int use_filter_intra_mode = - mbmi->filter_intra_mode_info.use_filter_intra; -#if CONFIG_ENTROPY_STATS - ++counts->filter_intra[mbmi->sb_type][use_filter_intra_mode]; - if (use_filter_intra_mode) { - ++counts - ->filter_intra_mode[mbmi->filter_intra_mode_info.filter_intra_mode]; - } -#endif // CONFIG_ENTROPY_STATS - if (allow_update_cdf) { - update_cdf(fc->filter_intra_cdfs[mbmi->sb_type], use_filter_intra_mode, - 2); - if (use_filter_intra_mode) { - update_cdf(fc->filter_intra_mode_cdf, - mbmi->filter_intra_mode_info.filter_intra_mode, - FILTER_INTRA_MODES); - } - } - } - if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) { -#if CONFIG_ENTROPY_STATS - ++counts->angle_delta[mbmi->mode - V_PRED] - [mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA]; -#endif - if (allow_update_cdf) { - update_cdf(fc->angle_delta_cdf[mbmi->mode - V_PRED], - mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA, - 2 * MAX_ANGLE_DELTA + 1); - } - } - - if (!is_chroma_reference(mi_row, mi_col, bsize, - xd->plane[AOM_PLANE_U].subsampling_x, - xd->plane[AOM_PLANE_U].subsampling_y)) - return; - -#if CONFIG_ENTROPY_STATS - ++counts->uv_mode[is_cfl_allowed(xd)][y_mode][uv_mode]; -#endif // CONFIG_ENTROPY_STATS - if (allow_update_cdf) { - const CFL_ALLOWED_TYPE cfl_allowed = is_cfl_allowed(xd); - update_cdf(fc->uv_mode_cdf[cfl_allowed][y_mode], uv_mode, - UV_INTRA_MODES - !cfl_allowed); - } - if (uv_mode == UV_CFL_PRED) { - const int joint_sign = mbmi->cfl_alpha_signs; - const int idx = mbmi->cfl_alpha_idx; - -#if CONFIG_ENTROPY_STATS - ++counts->cfl_sign[joint_sign]; -#endif - if (allow_update_cdf) - update_cdf(fc->cfl_sign_cdf, joint_sign, CFL_JOINT_SIGNS); - if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) { - aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)]; - -#if CONFIG_ENTROPY_STATS - ++counts->cfl_alpha[CFL_CONTEXT_U(joint_sign)][CFL_IDX_U(idx)]; -#endif - if (allow_update_cdf) - update_cdf(cdf_u, CFL_IDX_U(idx), CFL_ALPHABET_SIZE); - } - if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) { - aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)]; - -#if CONFIG_ENTROPY_STATS - ++counts->cfl_alpha[CFL_CONTEXT_V(joint_sign)][CFL_IDX_V(idx)]; -#endif - if (allow_update_cdf) - update_cdf(cdf_v, CFL_IDX_V(idx), CFL_ALPHABET_SIZE); - } - } - if (av1_is_directional_mode(get_uv_mode(uv_mode)) && - av1_use_angle_delta(bsize)) { -#if CONFIG_ENTROPY_STATS - ++counts->angle_delta[uv_mode - UV_V_PRED] - [mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA]; -#endif - if (allow_update_cdf) { - update_cdf(fc->angle_delta_cdf[uv_mode - UV_V_PRED], - mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA, - 2 * MAX_ANGLE_DELTA + 1); - } - } - if (av1_allow_palette(cm->allow_screen_content_tools, bsize)) - update_palette_cdf(xd, mbmi, counts, allow_update_cdf); -} - -static void update_stats(const AV1_COMMON *const cm, TileDataEnc *tile_data, - ThreadData *td, int mi_row, int mi_col) { - MACROBLOCK *x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; - const BLOCK_SIZE bsize = mbmi->sb_type; - FRAME_CONTEXT *fc = xd->tile_ctx; - const uint8_t allow_update_cdf = tile_data->allow_update_cdf; - - // delta quant applies to both intra and inter - const int super_block_upper_left = - ((mi_row & (cm->seq_params.mib_size - 1)) == 0) && - ((mi_col & (cm->seq_params.mib_size - 1)) == 0); - - const int seg_ref_active = - segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME); - - if (cm->skip_mode_flag && !seg_ref_active && is_comp_ref_allowed(bsize)) { - const int skip_mode_ctx = av1_get_skip_mode_context(xd); -#if CONFIG_ENTROPY_STATS - td->counts->skip_mode[skip_mode_ctx][mbmi->skip_mode]++; -#endif - if (allow_update_cdf) - update_cdf(fc->skip_mode_cdfs[skip_mode_ctx], mbmi->skip_mode, 2); - } - - if (!mbmi->skip_mode) { - if (!seg_ref_active) { - const int skip_ctx = av1_get_skip_context(xd); -#if CONFIG_ENTROPY_STATS - td->counts->skip[skip_ctx][mbmi->skip]++; -#endif - if (allow_update_cdf) update_cdf(fc->skip_cdfs[skip_ctx], mbmi->skip, 2); - } - } - - if (cm->delta_q_present_flag && - (bsize != cm->seq_params.sb_size || !mbmi->skip) && - super_block_upper_left) { -#if CONFIG_ENTROPY_STATS - const int dq = - (mbmi->current_qindex - xd->current_qindex) / cm->delta_q_res; - const int absdq = abs(dq); - for (int i = 0; i < AOMMIN(absdq, DELTA_Q_SMALL); ++i) { - td->counts->delta_q[i][1]++; - } - if (absdq < DELTA_Q_SMALL) td->counts->delta_q[absdq][0]++; -#endif - xd->current_qindex = mbmi->current_qindex; - if (cm->delta_lf_present_flag) { - if (cm->delta_lf_multi) { - const int frame_lf_count = - av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; - for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) { -#if CONFIG_ENTROPY_STATS - const int delta_lf = - (mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) / cm->delta_lf_res; - const int abs_delta_lf = abs(delta_lf); - for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) { - td->counts->delta_lf_multi[lf_id][i][1]++; - } - if (abs_delta_lf < DELTA_LF_SMALL) - td->counts->delta_lf_multi[lf_id][abs_delta_lf][0]++; -#endif - xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id]; - } - } else { -#if CONFIG_ENTROPY_STATS - const int delta_lf = - (mbmi->delta_lf_from_base - xd->delta_lf_from_base) / - cm->delta_lf_res; - const int abs_delta_lf = abs(delta_lf); - for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) { - td->counts->delta_lf[i][1]++; - } - if (abs_delta_lf < DELTA_LF_SMALL) - td->counts->delta_lf[abs_delta_lf][0]++; -#endif - xd->delta_lf_from_base = mbmi->delta_lf_from_base; - } - } - } - - if (!is_inter_block(mbmi)) { - sum_intra_stats(cm, td->counts, xd, mbmi, xd->above_mbmi, xd->left_mbmi, - frame_is_intra_only(cm), mi_row, mi_col, - tile_data->allow_update_cdf); - } - - if (av1_allow_intrabc(cm)) { - if (allow_update_cdf) - update_cdf(fc->intrabc_cdf, is_intrabc_block(mbmi), 2); -#if CONFIG_ENTROPY_STATS - ++td->counts->intrabc[is_intrabc_block(mbmi)]; -#endif // CONFIG_ENTROPY_STATS - } - - if (!frame_is_intra_only(cm)) { - RD_COUNTS *rdc = &td->rd_counts; - - FRAME_COUNTS *const counts = td->counts; - - if (mbmi->skip_mode) { - rdc->skip_mode_used_flag = 1; - if (cm->reference_mode == REFERENCE_MODE_SELECT) { - assert(has_second_ref(mbmi)); - rdc->compound_ref_used_flag = 1; - } - set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - return; - } - - const int inter_block = is_inter_block(mbmi); - - if (!seg_ref_active) { -#if CONFIG_ENTROPY_STATS - counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++; -#endif - if (allow_update_cdf) { - update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)], - inter_block, 2); - } - // If the segment reference feature is enabled we have only a single - // reference frame allowed for the segment so exclude it from - // the reference frame counts used to work out probabilities. - if (inter_block) { - const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0]; - const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1]; - - av1_collect_neighbors_ref_counts(xd); - - if (cm->reference_mode == REFERENCE_MODE_SELECT) { - if (has_second_ref(mbmi)) - // This flag is also updated for 4x4 blocks - rdc->compound_ref_used_flag = 1; - if (is_comp_ref_allowed(bsize)) { -#if CONFIG_ENTROPY_STATS - counts->comp_inter[av1_get_reference_mode_context(xd)] - [has_second_ref(mbmi)]++; -#endif // CONFIG_ENTROPY_STATS - if (allow_update_cdf) { - update_cdf(av1_get_reference_mode_cdf(xd), has_second_ref(mbmi), - 2); - } - } - } - - if (has_second_ref(mbmi)) { - const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi) - ? UNIDIR_COMP_REFERENCE - : BIDIR_COMP_REFERENCE; - if (allow_update_cdf) { - update_cdf(av1_get_comp_reference_type_cdf(xd), comp_ref_type, - COMP_REFERENCE_TYPES); - } -#if CONFIG_ENTROPY_STATS - counts->comp_ref_type[av1_get_comp_reference_type_context(xd)] - [comp_ref_type]++; -#endif // CONFIG_ENTROPY_STATS - - if (comp_ref_type == UNIDIR_COMP_REFERENCE) { - const int bit = (ref0 == BWDREF_FRAME); - if (allow_update_cdf) - update_cdf(av1_get_pred_cdf_uni_comp_ref_p(xd), bit, 2); -#if CONFIG_ENTROPY_STATS - counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0] - [bit]++; -#endif // CONFIG_ENTROPY_STATS - if (!bit) { - const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME); - if (allow_update_cdf) - update_cdf(av1_get_pred_cdf_uni_comp_ref_p1(xd), bit1, 2); -#if CONFIG_ENTROPY_STATS - counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1] - [bit1]++; -#endif // CONFIG_ENTROPY_STATS - if (bit1) { - if (allow_update_cdf) { - update_cdf(av1_get_pred_cdf_uni_comp_ref_p2(xd), - ref1 == GOLDEN_FRAME, 2); - } -#if CONFIG_ENTROPY_STATS - counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)] - [2][ref1 == GOLDEN_FRAME]++; -#endif // CONFIG_ENTROPY_STATS - } - } - } else { - const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME); - if (allow_update_cdf) - update_cdf(av1_get_pred_cdf_comp_ref_p(xd), bit, 2); -#if CONFIG_ENTROPY_STATS - counts->comp_ref[av1_get_pred_context_comp_ref_p(xd)][0][bit]++; -#endif // CONFIG_ENTROPY_STATS - if (!bit) { - if (allow_update_cdf) { - update_cdf(av1_get_pred_cdf_comp_ref_p1(xd), - ref0 == LAST2_FRAME, 2); - } -#if CONFIG_ENTROPY_STATS - counts->comp_ref[av1_get_pred_context_comp_ref_p1(xd)][1] - [ref0 == LAST2_FRAME]++; -#endif // CONFIG_ENTROPY_STATS - } else { - if (allow_update_cdf) { - update_cdf(av1_get_pred_cdf_comp_ref_p2(xd), - ref0 == GOLDEN_FRAME, 2); - } -#if CONFIG_ENTROPY_STATS - counts->comp_ref[av1_get_pred_context_comp_ref_p2(xd)][2] - [ref0 == GOLDEN_FRAME]++; -#endif // CONFIG_ENTROPY_STATS - } - if (allow_update_cdf) { - update_cdf(av1_get_pred_cdf_comp_bwdref_p(xd), - ref1 == ALTREF_FRAME, 2); - } -#if CONFIG_ENTROPY_STATS - counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(xd)][0] - [ref1 == ALTREF_FRAME]++; -#endif // CONFIG_ENTROPY_STATS - if (ref1 != ALTREF_FRAME) { - if (allow_update_cdf) { - update_cdf(av1_get_pred_cdf_comp_bwdref_p1(xd), - ref1 == ALTREF2_FRAME, 2); - } -#if CONFIG_ENTROPY_STATS - counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p1(xd)][1] - [ref1 == ALTREF2_FRAME]++; -#endif // CONFIG_ENTROPY_STATS - } - } - } else { - const int bit = (ref0 >= BWDREF_FRAME); - if (allow_update_cdf) - update_cdf(av1_get_pred_cdf_single_ref_p1(xd), bit, 2); -#if CONFIG_ENTROPY_STATS - counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0][bit]++; -#endif // CONFIG_ENTROPY_STATS - if (bit) { - assert(ref0 <= ALTREF_FRAME); - if (allow_update_cdf) { - update_cdf(av1_get_pred_cdf_single_ref_p2(xd), - ref0 == ALTREF_FRAME, 2); - } -#if CONFIG_ENTROPY_STATS - counts->single_ref[av1_get_pred_context_single_ref_p2(xd)][1] - [ref0 == ALTREF_FRAME]++; -#endif // CONFIG_ENTROPY_STATS - if (ref0 != ALTREF_FRAME) { - if (allow_update_cdf) { - update_cdf(av1_get_pred_cdf_single_ref_p6(xd), - ref0 == ALTREF2_FRAME, 2); - } -#if CONFIG_ENTROPY_STATS - counts->single_ref[av1_get_pred_context_single_ref_p6(xd)][5] - [ref0 == ALTREF2_FRAME]++; -#endif // CONFIG_ENTROPY_STATS - } - } else { - const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME); - if (allow_update_cdf) - update_cdf(av1_get_pred_cdf_single_ref_p3(xd), bit1, 2); -#if CONFIG_ENTROPY_STATS - counts - ->single_ref[av1_get_pred_context_single_ref_p3(xd)][2][bit1]++; -#endif // CONFIG_ENTROPY_STATS - if (!bit1) { - if (allow_update_cdf) { - update_cdf(av1_get_pred_cdf_single_ref_p4(xd), - ref0 != LAST_FRAME, 2); - } -#if CONFIG_ENTROPY_STATS - counts->single_ref[av1_get_pred_context_single_ref_p4(xd)][3] - [ref0 != LAST_FRAME]++; -#endif // CONFIG_ENTROPY_STATS - } else { - if (allow_update_cdf) { - update_cdf(av1_get_pred_cdf_single_ref_p5(xd), - ref0 != LAST3_FRAME, 2); - } -#if CONFIG_ENTROPY_STATS - counts->single_ref[av1_get_pred_context_single_ref_p5(xd)][4] - [ref0 != LAST3_FRAME]++; -#endif // CONFIG_ENTROPY_STATS - } - } - } - - if (cm->seq_params.enable_interintra_compound && - is_interintra_allowed(mbmi)) { - const int bsize_group = size_group_lookup[bsize]; - if (mbmi->ref_frame[1] == INTRA_FRAME) { -#if CONFIG_ENTROPY_STATS - counts->interintra[bsize_group][1]++; -#endif - if (allow_update_cdf) - update_cdf(fc->interintra_cdf[bsize_group], 1, 2); -#if CONFIG_ENTROPY_STATS - counts->interintra_mode[bsize_group][mbmi->interintra_mode]++; -#endif - if (allow_update_cdf) { - update_cdf(fc->interintra_mode_cdf[bsize_group], - mbmi->interintra_mode, INTERINTRA_MODES); - } - if (is_interintra_wedge_used(bsize)) { -#if CONFIG_ENTROPY_STATS - counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++; -#endif - if (allow_update_cdf) { - update_cdf(fc->wedge_interintra_cdf[bsize], - mbmi->use_wedge_interintra, 2); - } - if (mbmi->use_wedge_interintra) { -#if CONFIG_ENTROPY_STATS - counts->wedge_idx[bsize][mbmi->interintra_wedge_index]++; -#endif - if (allow_update_cdf) { - update_cdf(fc->wedge_idx_cdf[bsize], - mbmi->interintra_wedge_index, 16); - } - } - } - } else { -#if CONFIG_ENTROPY_STATS - counts->interintra[bsize_group][0]++; -#endif - if (allow_update_cdf) - update_cdf(fc->interintra_cdf[bsize_group], 0, 2); - } - } - - set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - const MOTION_MODE motion_allowed = - cm->switchable_motion_mode - ? motion_mode_allowed(xd->global_motion, xd, mbmi, - cm->allow_warped_motion) - : SIMPLE_TRANSLATION; - if (mbmi->ref_frame[1] != INTRA_FRAME) { - if (motion_allowed == WARPED_CAUSAL) { -#if CONFIG_ENTROPY_STATS - counts->motion_mode[bsize][mbmi->motion_mode]++; -#endif - if (allow_update_cdf) { - update_cdf(fc->motion_mode_cdf[bsize], mbmi->motion_mode, - MOTION_MODES); - } - } else if (motion_allowed == OBMC_CAUSAL) { -#if CONFIG_ENTROPY_STATS - counts->obmc[bsize][mbmi->motion_mode == OBMC_CAUSAL]++; -#endif - if (allow_update_cdf) { - update_cdf(fc->obmc_cdf[bsize], mbmi->motion_mode == OBMC_CAUSAL, - 2); - } - } - } - - if (has_second_ref(mbmi)) { - assert(cm->reference_mode != SINGLE_REFERENCE && - is_inter_compound_mode(mbmi->mode) && - mbmi->motion_mode == SIMPLE_TRANSLATION); - - const int masked_compound_used = - is_any_masked_compound_used(bsize) && - cm->seq_params.enable_masked_compound; - if (masked_compound_used) { - const int comp_group_idx_ctx = get_comp_group_idx_context(xd); -#if CONFIG_ENTROPY_STATS - ++counts->comp_group_idx[comp_group_idx_ctx][mbmi->comp_group_idx]; -#endif - if (allow_update_cdf) { - update_cdf(fc->comp_group_idx_cdf[comp_group_idx_ctx], - mbmi->comp_group_idx, 2); - } - } - - if (mbmi->comp_group_idx == 0) { - const int comp_index_ctx = get_comp_index_context(cm, xd); -#if CONFIG_ENTROPY_STATS - ++counts->compound_index[comp_index_ctx][mbmi->compound_idx]; -#endif - if (allow_update_cdf) { - update_cdf(fc->compound_index_cdf[comp_index_ctx], - mbmi->compound_idx, 2); - } - } else { - assert(masked_compound_used); - if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) { -#if CONFIG_ENTROPY_STATS - ++counts->compound_type[bsize][mbmi->interinter_comp.type - 1]; -#endif - if (allow_update_cdf) { - update_cdf(fc->compound_type_cdf[bsize], - mbmi->interinter_comp.type - 1, COMPOUND_TYPES - 1); - } - } - } - } - if (mbmi->interinter_comp.type == COMPOUND_WEDGE) { - if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) { -#if CONFIG_ENTROPY_STATS - counts->wedge_idx[bsize][mbmi->interinter_comp.wedge_index]++; -#endif - if (allow_update_cdf) { - update_cdf(fc->wedge_idx_cdf[bsize], - mbmi->interinter_comp.wedge_index, 16); - } - } - } - } - } - - if (inter_block && - !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - int16_t mode_ctx; - const PREDICTION_MODE mode = mbmi->mode; - - mode_ctx = - av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame); - if (has_second_ref(mbmi)) { -#if CONFIG_ENTROPY_STATS - ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)]; -#endif - if (allow_update_cdf) - update_cdf(fc->inter_compound_mode_cdf[mode_ctx], - INTER_COMPOUND_OFFSET(mode), INTER_COMPOUND_MODES); - } else { - update_inter_mode_stats(fc, counts, mode, mode_ctx, allow_update_cdf); - } - - int mode_allowed = (mbmi->mode == NEWMV); - mode_allowed |= (mbmi->mode == NEW_NEWMV); - if (mode_allowed) { - uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); - int idx; - - for (idx = 0; idx < 2; ++idx) { - if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { -#if CONFIG_ENTROPY_STATS - uint8_t drl_ctx = - av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); - ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx]; -#endif - - if (mbmi->ref_mv_idx == idx) break; - } - } - } - - if (have_nearmv_in_inter_mode(mbmi->mode)) { - uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); - int idx; - - for (idx = 1; idx < 3; ++idx) { - if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { -#if CONFIG_ENTROPY_STATS - uint8_t drl_ctx = - av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); - ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1]; -#endif - - if (mbmi->ref_mv_idx == idx - 1) break; - } - } - } - } - } -} - -typedef struct { - ENTROPY_CONTEXT a[MAX_MIB_SIZE * MAX_MB_PLANE]; - ENTROPY_CONTEXT l[MAX_MIB_SIZE * MAX_MB_PLANE]; - PARTITION_CONTEXT sa[MAX_MIB_SIZE]; - PARTITION_CONTEXT sl[MAX_MIB_SIZE]; - TXFM_CONTEXT *p_ta; - TXFM_CONTEXT *p_tl; - TXFM_CONTEXT ta[MAX_MIB_SIZE]; - TXFM_CONTEXT tl[MAX_MIB_SIZE]; -} RD_SEARCH_MACROBLOCK_CONTEXT; - -static void restore_context(MACROBLOCK *x, - const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, int mi_row, - int mi_col, BLOCK_SIZE bsize, - const int num_planes) { - MACROBLOCKD *xd = &x->e_mbd; - int p; - const int num_4x4_blocks_wide = - block_size_wide[bsize] >> tx_size_wide_log2[0]; - const int num_4x4_blocks_high = - block_size_high[bsize] >> tx_size_high_log2[0]; - int mi_width = mi_size_wide[bsize]; - int mi_height = mi_size_high[bsize]; - for (p = 0; p < num_planes; p++) { - int tx_col = mi_col; - int tx_row = mi_row & MAX_MIB_MASK; - memcpy(xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x), - ctx->a + num_4x4_blocks_wide * p, - (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> - xd->plane[p].subsampling_x); - memcpy(xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y), - ctx->l + num_4x4_blocks_high * p, - (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> - xd->plane[p].subsampling_y); - } - memcpy(xd->above_seg_context + mi_col, ctx->sa, - sizeof(*xd->above_seg_context) * mi_width); - memcpy(xd->left_seg_context + (mi_row & MAX_MIB_MASK), ctx->sl, - sizeof(xd->left_seg_context[0]) * mi_height); - xd->above_txfm_context = ctx->p_ta; - xd->left_txfm_context = ctx->p_tl; - memcpy(xd->above_txfm_context, ctx->ta, - sizeof(*xd->above_txfm_context) * mi_width); - memcpy(xd->left_txfm_context, ctx->tl, - sizeof(*xd->left_txfm_context) * mi_height); -} - -static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx, - int mi_row, int mi_col, BLOCK_SIZE bsize, - const int num_planes) { - const MACROBLOCKD *xd = &x->e_mbd; - int p; - const int num_4x4_blocks_wide = - block_size_wide[bsize] >> tx_size_wide_log2[0]; - const int num_4x4_blocks_high = - block_size_high[bsize] >> tx_size_high_log2[0]; - int mi_width = mi_size_wide[bsize]; - int mi_height = mi_size_high[bsize]; - - // buffer the above/left context information of the block in search. - for (p = 0; p < num_planes; ++p) { - int tx_col = mi_col; - int tx_row = mi_row & MAX_MIB_MASK; - memcpy(ctx->a + num_4x4_blocks_wide * p, - xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x), - (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> - xd->plane[p].subsampling_x); - memcpy(ctx->l + num_4x4_blocks_high * p, - xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y), - (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> - xd->plane[p].subsampling_y); - } - memcpy(ctx->sa, xd->above_seg_context + mi_col, - sizeof(*xd->above_seg_context) * mi_width); - memcpy(ctx->sl, xd->left_seg_context + (mi_row & MAX_MIB_MASK), - sizeof(xd->left_seg_context[0]) * mi_height); - memcpy(ctx->ta, xd->above_txfm_context, - sizeof(*xd->above_txfm_context) * mi_width); - memcpy(ctx->tl, xd->left_txfm_context, - sizeof(*xd->left_txfm_context) * mi_height); - ctx->p_ta = xd->above_txfm_context; - ctx->p_tl = xd->left_txfm_context; -} - -static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data, - ThreadData *td, TOKENEXTRA **tp, int mi_row, int mi_col, - RUN_TYPE dry_run, BLOCK_SIZE bsize, - PARTITION_TYPE partition, - const PICK_MODE_CONTEXT *const ctx, int *rate) { - TileInfo *const tile = &tile_data->tile_info; - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *xd = &x->e_mbd; - - set_offsets(cpi, tile, x, mi_row, mi_col, bsize); - MB_MODE_INFO *mbmi = xd->mi[0]; - mbmi->partition = partition; - update_state(cpi, tile_data, td, ctx, mi_row, mi_col, bsize, dry_run); - - if (!dry_run) av1_set_coeff_buffer(cpi, x, mi_row, mi_col); - - encode_superblock(cpi, tile_data, td, tp, dry_run, mi_row, mi_col, bsize, - rate); - - if (dry_run == 0) - x->cb_offset += block_size_wide[bsize] * block_size_high[bsize]; - - if (!dry_run) { - if (bsize == cpi->common.seq_params.sb_size && mbmi->skip == 1 && - cpi->common.delta_lf_present_flag) { - const int frame_lf_count = av1_num_planes(&cpi->common) > 1 - ? FRAME_LF_COUNT - : FRAME_LF_COUNT - 2; - for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) - mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id]; - mbmi->delta_lf_from_base = xd->delta_lf_from_base; - } - if (has_second_ref(mbmi)) { - if (mbmi->compound_idx == 0 || - mbmi->interinter_comp.type == COMPOUND_AVERAGE) - mbmi->comp_group_idx = 0; - else - mbmi->comp_group_idx = 1; - } - update_stats(&cpi->common, tile_data, td, mi_row, mi_col); - } -} - -static void encode_sb(const AV1_COMP *const cpi, ThreadData *td, - TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row, - int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize, - PC_TREE *pc_tree, int *rate) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - const int hbs = mi_size_wide[bsize] / 2; - const int is_partition_root = bsize >= BLOCK_8X8; - const int ctx = is_partition_root - ? partition_plane_context(xd, mi_row, mi_col, bsize) - : -1; - const PARTITION_TYPE partition = pc_tree->partitioning; - const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); - int quarter_step = mi_size_wide[bsize] / 4; - int i; - BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); - - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - - if (!dry_run && ctx >= 0) { - const int has_rows = (mi_row + hbs) < cm->mi_rows; - const int has_cols = (mi_col + hbs) < cm->mi_cols; - - if (has_rows && has_cols) { -#if CONFIG_ENTROPY_STATS - td->counts->partition[ctx][partition]++; -#endif - - if (tile_data->allow_update_cdf) { - FRAME_CONTEXT *fc = xd->tile_ctx; - update_cdf(fc->partition_cdf[ctx], partition, - partition_cdf_length(bsize)); - } - } - } - - switch (partition) { - case PARTITION_NONE: - encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, - partition, &pc_tree->none, rate); - break; - case PARTITION_VERT: - encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, - partition, &pc_tree->vertical[0], rate); - if (mi_col + hbs < cm->mi_cols) { - encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize, - partition, &pc_tree->vertical[1], rate); - } - break; - case PARTITION_HORZ: - encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, - partition, &pc_tree->horizontal[0], rate); - if (mi_row + hbs < cm->mi_rows) { - encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize, - partition, &pc_tree->horizontal[1], rate); - } - break; - case PARTITION_SPLIT: - encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize, - pc_tree->split[0], rate); - encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + hbs, dry_run, subsize, - pc_tree->split[1], rate); - encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col, dry_run, subsize, - pc_tree->split[2], rate); - encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col + hbs, dry_run, - subsize, pc_tree->split[3], rate); - break; - - case PARTITION_HORZ_A: - encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2, - partition, &pc_tree->horizontala[0], rate); - encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2, - partition, &pc_tree->horizontala[1], rate); - encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize, - partition, &pc_tree->horizontala[2], rate); - break; - case PARTITION_HORZ_B: - encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, - partition, &pc_tree->horizontalb[0], rate); - encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2, - partition, &pc_tree->horizontalb[1], rate); - encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run, - bsize2, partition, &pc_tree->horizontalb[2], rate); - break; - case PARTITION_VERT_A: - encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2, - partition, &pc_tree->verticala[0], rate); - encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2, - partition, &pc_tree->verticala[1], rate); - encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize, - partition, &pc_tree->verticala[2], rate); - - break; - case PARTITION_VERT_B: - encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize, - partition, &pc_tree->verticalb[0], rate); - encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2, - partition, &pc_tree->verticalb[1], rate); - encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run, - bsize2, partition, &pc_tree->verticalb[2], rate); - break; - case PARTITION_HORZ_4: - for (i = 0; i < 4; ++i) { - int this_mi_row = mi_row + i * quarter_step; - if (i > 0 && this_mi_row >= cm->mi_rows) break; - - encode_b(cpi, tile_data, td, tp, this_mi_row, mi_col, dry_run, subsize, - partition, &pc_tree->horizontal4[i], rate); - } - break; - case PARTITION_VERT_4: - for (i = 0; i < 4; ++i) { - int this_mi_col = mi_col + i * quarter_step; - if (i > 0 && this_mi_col >= cm->mi_cols) break; - - encode_b(cpi, tile_data, td, tp, mi_row, this_mi_col, dry_run, subsize, - partition, &pc_tree->vertical4[i], rate); - } - break; - default: assert(0 && "Invalid partition type."); break; - } - - update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition); -} - -// Check to see if the given partition size is allowed for a specified number -// of mi block rows and columns remaining in the image. -// If not then return the largest allowed partition size -static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left, - int cols_left, int *bh, int *bw) { - if (rows_left <= 0 || cols_left <= 0) { - return AOMMIN(bsize, BLOCK_8X8); - } else { - for (; bsize > 0; bsize -= 3) { - *bh = mi_size_high[bsize]; - *bw = mi_size_wide[bsize]; - if ((*bh <= rows_left) && (*bw <= cols_left)) { - break; - } - } - } - return bsize; -} - -static void set_partial_sb_partition(const AV1_COMMON *const cm, - MB_MODE_INFO *mi, int bh_in, int bw_in, - int mi_rows_remaining, - int mi_cols_remaining, BLOCK_SIZE bsize, - MB_MODE_INFO **mib) { - int bh = bh_in; - int r, c; - for (r = 0; r < cm->seq_params.mib_size; r += bh) { - int bw = bw_in; - for (c = 0; c < cm->seq_params.mib_size; c += bw) { - const int index = r * cm->mi_stride + c; - mib[index] = mi + index; - mib[index]->sb_type = find_partition_size( - bsize, mi_rows_remaining - r, mi_cols_remaining - c, &bh, &bw); - } - } -} - -// This function attempts to set all mode info entries in a given superblock -// to the same block partition size. -// However, at the bottom and right borders of the image the requested size -// may not be allowed in which case this code attempts to choose the largest -// allowable partition. -static void set_fixed_partitioning(AV1_COMP *cpi, const TileInfo *const tile, - MB_MODE_INFO **mib, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - AV1_COMMON *const cm = &cpi->common; - const int mi_rows_remaining = tile->mi_row_end - mi_row; - const int mi_cols_remaining = tile->mi_col_end - mi_col; - int block_row, block_col; - MB_MODE_INFO *const mi_upper_left = cm->mi + mi_row * cm->mi_stride + mi_col; - int bh = mi_size_high[bsize]; - int bw = mi_size_wide[bsize]; - - assert((mi_rows_remaining > 0) && (mi_cols_remaining > 0)); - - // Apply the requested partition size to the SB if it is all "in image" - if ((mi_cols_remaining >= cm->seq_params.mib_size) && - (mi_rows_remaining >= cm->seq_params.mib_size)) { - for (block_row = 0; block_row < cm->seq_params.mib_size; block_row += bh) { - for (block_col = 0; block_col < cm->seq_params.mib_size; - block_col += bw) { - int index = block_row * cm->mi_stride + block_col; - mib[index] = mi_upper_left + index; - mib[index]->sb_type = bsize; - } - } - } else { - // Else this is a partial SB. - set_partial_sb_partition(cm, mi_upper_left, bh, bw, mi_rows_remaining, - mi_cols_remaining, bsize, mib); - } -} - -static void rd_use_partition(AV1_COMP *cpi, ThreadData *td, - TileDataEnc *tile_data, MB_MODE_INFO **mib, - TOKENEXTRA **tp, int mi_row, int mi_col, - BLOCK_SIZE bsize, int *rate, int64_t *dist, - int do_recon, PC_TREE *pc_tree) { - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - TileInfo *const tile_info = &tile_data->tile_info; - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - const int bs = mi_size_wide[bsize]; - const int hbs = bs / 2; - int i; - const int pl = (bsize >= BLOCK_8X8) - ? partition_plane_context(xd, mi_row, mi_col, bsize) - : 0; - const PARTITION_TYPE partition = - (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize) - : PARTITION_NONE; - const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition); - RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; - RD_STATS last_part_rdc, none_rdc, chosen_rdc; - BLOCK_SIZE sub_subsize = BLOCK_4X4; - int splits_below = 0; - BLOCK_SIZE bs_type = mib[0]->sb_type; - int do_partition_search = 1; - PICK_MODE_CONTEXT *ctx_none = &pc_tree->none; - - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - - assert(mi_size_wide[bsize] == mi_size_high[bsize]); - - av1_invalid_rd_stats(&last_part_rdc); - av1_invalid_rd_stats(&none_rdc); - av1_invalid_rd_stats(&chosen_rdc); - - pc_tree->partitioning = partition; - - xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col; - xd->left_txfm_context = - xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); - save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - - if (bsize == BLOCK_16X16 && cpi->vaq_refresh) { - set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); - x->mb_energy = av1_log_block_var(cpi, x, bsize); - } - - if (do_partition_search && - cpi->sf.partition_search_type == SEARCH_PARTITION && - cpi->sf.adjust_partitioning_from_last_frame) { - // Check if any of the sub blocks are further split. - if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) { - sub_subsize = get_partition_subsize(subsize, PARTITION_SPLIT); - splits_below = 1; - for (i = 0; i < 4; i++) { - int jj = i >> 1, ii = i & 0x01; - MB_MODE_INFO *this_mi = mib[jj * hbs * cm->mi_stride + ii * hbs]; - if (this_mi && this_mi->sb_type >= sub_subsize) { - splits_below = 0; - } - } - } - - // If partition is not none try none unless each of the 4 splits are split - // even further.. - if (partition != PARTITION_NONE && !splits_below && - mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) { - pc_tree->partitioning = PARTITION_NONE; - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, - PARTITION_NONE, bsize, ctx_none, INT64_MAX); - - if (none_rdc.rate < INT_MAX) { - none_rdc.rate += x->partition_cost[pl][PARTITION_NONE]; - none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist); - } - - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - mib[0]->sb_type = bs_type; - pc_tree->partitioning = partition; - } - } - for (int b = 0; b < 2; ++b) { - pc_tree->horizontal[b].skip_ref_frame_mask = 0; - pc_tree->vertical[b].skip_ref_frame_mask = 0; - } - for (int b = 0; b < 3; ++b) { - pc_tree->horizontala[b].skip_ref_frame_mask = 0; - pc_tree->horizontalb[b].skip_ref_frame_mask = 0; - pc_tree->verticala[b].skip_ref_frame_mask = 0; - pc_tree->verticalb[b].skip_ref_frame_mask = 0; - } - for (int b = 0; b < 4; ++b) { - pc_tree->horizontal4[b].skip_ref_frame_mask = 0; - pc_tree->vertical4[b].skip_ref_frame_mask = 0; - } - switch (partition) { - case PARTITION_NONE: - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, - PARTITION_NONE, bsize, ctx_none, INT64_MAX); - break; - case PARTITION_HORZ: - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, - PARTITION_HORZ, subsize, &pc_tree->horizontal[0], - INT64_MAX); - if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && - mi_row + hbs < cm->mi_rows) { - RD_STATS tmp_rdc; - const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0]; - av1_init_rd_stats(&tmp_rdc); - update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1); - encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, - mi_col, subsize, NULL); - rd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc, - PARTITION_HORZ, subsize, &pc_tree->horizontal[1], - INT64_MAX); - if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { - av1_invalid_rd_stats(&last_part_rdc); - break; - } - last_part_rdc.rate += tmp_rdc.rate; - last_part_rdc.dist += tmp_rdc.dist; - last_part_rdc.rdcost += tmp_rdc.rdcost; - } - break; - case PARTITION_VERT: - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, - PARTITION_VERT, subsize, &pc_tree->vertical[0], - INT64_MAX); - if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && - mi_col + hbs < cm->mi_cols) { - RD_STATS tmp_rdc; - const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0]; - av1_init_rd_stats(&tmp_rdc); - update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1); - encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, - mi_col, subsize, NULL); - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc, - PARTITION_VERT, subsize, - &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX); - if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { - av1_invalid_rd_stats(&last_part_rdc); - break; - } - last_part_rdc.rate += tmp_rdc.rate; - last_part_rdc.dist += tmp_rdc.dist; - last_part_rdc.rdcost += tmp_rdc.rdcost; - } - break; - case PARTITION_SPLIT: - last_part_rdc.rate = 0; - last_part_rdc.dist = 0; - last_part_rdc.rdcost = 0; - for (i = 0; i < 4; i++) { - int x_idx = (i & 1) * hbs; - int y_idx = (i >> 1) * hbs; - int jj = i >> 1, ii = i & 0x01; - RD_STATS tmp_rdc; - if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) - continue; - - av1_init_rd_stats(&tmp_rdc); - rd_use_partition(cpi, td, tile_data, - mib + jj * hbs * cm->mi_stride + ii * hbs, tp, - mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate, - &tmp_rdc.dist, i != 3, pc_tree->split[i]); - if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { - av1_invalid_rd_stats(&last_part_rdc); - break; - } - last_part_rdc.rate += tmp_rdc.rate; - last_part_rdc.dist += tmp_rdc.dist; - } - break; - case PARTITION_VERT_A: - case PARTITION_VERT_B: - case PARTITION_HORZ_A: - case PARTITION_HORZ_B: - case PARTITION_HORZ_4: - case PARTITION_VERT_4: - assert(0 && "Cannot handle extended partition types"); - default: assert(0); break; - } - - if (last_part_rdc.rate < INT_MAX) { - last_part_rdc.rate += x->partition_cost[pl][partition]; - last_part_rdc.rdcost = - RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist); - } - - if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame && - cpi->sf.partition_search_type == SEARCH_PARTITION && - partition != PARTITION_SPLIT && bsize > BLOCK_8X8 && - (mi_row + bs < cm->mi_rows || mi_row + hbs == cm->mi_rows) && - (mi_col + bs < cm->mi_cols || mi_col + hbs == cm->mi_cols)) { - BLOCK_SIZE split_subsize = get_partition_subsize(bsize, PARTITION_SPLIT); - chosen_rdc.rate = 0; - chosen_rdc.dist = 0; - - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - pc_tree->partitioning = PARTITION_SPLIT; - - // Split partition. - for (i = 0; i < 4; i++) { - int x_idx = (i & 1) * hbs; - int y_idx = (i >> 1) * hbs; - RD_STATS tmp_rdc; - - if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) - continue; - - save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - pc_tree->split[i]->partitioning = PARTITION_NONE; - rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, - &tmp_rdc, PARTITION_SPLIT, split_subsize, - &pc_tree->split[i]->none, INT64_MAX); - - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { - av1_invalid_rd_stats(&chosen_rdc); - break; - } - - chosen_rdc.rate += tmp_rdc.rate; - chosen_rdc.dist += tmp_rdc.dist; - - if (i != 3) - encode_sb(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, - OUTPUT_ENABLED, split_subsize, pc_tree->split[i], NULL); - - chosen_rdc.rate += x->partition_cost[pl][PARTITION_NONE]; - } - if (chosen_rdc.rate < INT_MAX) { - chosen_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT]; - chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist); - } - } - - // If last_part is better set the partitioning to that. - if (last_part_rdc.rdcost < chosen_rdc.rdcost) { - mib[0]->sb_type = bsize; - if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; - chosen_rdc = last_part_rdc; - } - // If none was better set the partitioning to that. - if (none_rdc.rdcost < chosen_rdc.rdcost) { - if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; - chosen_rdc = none_rdc; - } - - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - - // We must have chosen a partitioning and encoding or we'll fail later on. - // No other opportunities for success. - if (bsize == cm->seq_params.sb_size) - assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX); - - if (do_recon) { - if (bsize == cm->seq_params.sb_size) { - // NOTE: To get estimate for rate due to the tokens, use: - // int rate_coeffs = 0; - // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS, - // bsize, pc_tree, &rate_coeffs); - x->cb_offset = 0; - encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize, - pc_tree, NULL); - } else { - encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, - pc_tree, NULL); - } - } - - *rate = chosen_rdc.rate; - *dist = chosen_rdc.dist; -} - -/* clang-format off */ -static const BLOCK_SIZE min_partition_size[BLOCK_SIZES_ALL] = { - BLOCK_4X4, // 4x4 - BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, // 4x8, 8x4, 8x8 - BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 8x16, 16x8, 16x16 - BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 16x32, 32x16, 32x32 - BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 32x64, 64x32, 64x64 - BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 64x128, 128x64, 128x128 - BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x16, 16x4, 8x32 - BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, // 32x8, 16x64, 64x16 -}; - -static const BLOCK_SIZE max_partition_size[BLOCK_SIZES_ALL] = { - BLOCK_8X8, // 4x4 - BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, // 4x8, 8x4, 8x8 - BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, // 8x16, 16x8, 16x16 - BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, // 16x32, 32x16, 32x32 - BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST, // 32x64, 64x32, 64x64 - BLOCK_LARGEST, BLOCK_LARGEST, BLOCK_LARGEST, // 64x128, 128x64, 128x128 - BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 4x16, 16x4, 8x32 - BLOCK_32X32, BLOCK_LARGEST, BLOCK_LARGEST, // 32x8, 16x64, 64x16 -}; - -// Next square block size less or equal than current block size. -static const BLOCK_SIZE next_square_size[BLOCK_SIZES_ALL] = { - BLOCK_4X4, // 4x4 - BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x8, 8x4, 8x8 - BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, // 8x16, 16x8, 16x16 - BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, // 16x32, 32x16, 32x32 - BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, // 32x64, 64x32, 64x64 - BLOCK_64X64, BLOCK_64X64, BLOCK_128X128, // 64x128, 128x64, 128x128 - BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, // 4x16, 16x4, 8x32 - BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, // 32x8, 16x64, 64x16 -}; -/* clang-format on */ - -// Look at all the mode_info entries for blocks that are part of this -// partition and find the min and max values for sb_type. -// At the moment this is designed to work on a superblock but could be -// adjusted to use a size parameter. -// -// The min and max are assumed to have been initialized prior to calling this -// function so repeat calls can accumulate a min and max of more than one -// superblock. -static void get_sb_partition_size_range(const AV1_COMMON *const cm, - MACROBLOCKD *xd, MB_MODE_INFO **mib, - BLOCK_SIZE *min_block_size, - BLOCK_SIZE *max_block_size) { - int i, j; - int index = 0; - - // Check the sb_type for each block that belongs to this region. - for (i = 0; i < cm->seq_params.mib_size; ++i) { - for (j = 0; j < cm->seq_params.mib_size; ++j) { - MB_MODE_INFO *mi = mib[index + j]; - BLOCK_SIZE sb_type = mi ? mi->sb_type : BLOCK_4X4; - *min_block_size = AOMMIN(*min_block_size, sb_type); - *max_block_size = AOMMAX(*max_block_size, sb_type); - } - index += xd->mi_stride; - } -} - -// Checks to see if a super block is on a horizontal image edge. -// In most cases this is the "real" edge unless there are formatting -// bars embedded in the stream. -static int active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) { - int top_edge = 0; - int bottom_edge = cpi->common.mi_rows; - int is_active_h_edge = 0; - - // For two pass account for any formatting bars detected. - if (cpi->oxcf.pass == 2) { - const TWO_PASS *const twopass = &cpi->twopass; - - // The inactive region is specified in MBs not mi units. - // The image edge is in the following MB row. - top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2); - - bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2); - bottom_edge = AOMMAX(top_edge, bottom_edge); - } - - if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) || - ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) { - is_active_h_edge = 1; - } - return is_active_h_edge; -} - -// Checks to see if a super block is on a vertical image edge. -// In most cases this is the "real" edge unless there are formatting -// bars embedded in the stream. -static int active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) { - int left_edge = 0; - int right_edge = cpi->common.mi_cols; - int is_active_v_edge = 0; - - // For two pass account for any formatting bars detected. - if (cpi->oxcf.pass == 2) { - const TWO_PASS *const twopass = &cpi->twopass; - - // The inactive region is specified in MBs not mi units. - // The image edge is in the following MB row. - left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2); - - right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2); - right_edge = AOMMAX(left_edge, right_edge); - } - - if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) || - ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) { - is_active_v_edge = 1; - } - return is_active_v_edge; -} - -// Checks to see if a super block is at the edge of the active image. -// In most cases this is the "real" edge unless there are formatting -// bars embedded in the stream. -static int active_edge_sb(const AV1_COMP *cpi, int mi_row, int mi_col) { - return active_h_edge(cpi, mi_row, cpi->common.seq_params.mib_size) || - active_v_edge(cpi, mi_col, cpi->common.seq_params.mib_size); -} - -// Look at neighboring blocks and set a min and max partition size based on -// what they chose. -static void rd_auto_partition_range(AV1_COMP *cpi, const TileInfo *const tile, - MACROBLOCKD *const xd, int mi_row, - int mi_col, BLOCK_SIZE *min_block_size, - BLOCK_SIZE *max_block_size) { - AV1_COMMON *const cm = &cpi->common; - MB_MODE_INFO **mi = xd->mi; - const int left_in_image = xd->left_available && mi[-1]; - const int above_in_image = xd->up_available && mi[-xd->mi_stride]; - const int mi_rows_remaining = tile->mi_row_end - mi_row; - const int mi_cols_remaining = tile->mi_col_end - mi_col; - int bh, bw; - BLOCK_SIZE min_size = BLOCK_4X4; - BLOCK_SIZE max_size = BLOCK_LARGEST; - - // Trap case where we do not have a prediction. - if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { - // Default "min to max" and "max to min" - min_size = BLOCK_LARGEST; - max_size = BLOCK_4X4; - - // NOTE: each call to get_sb_partition_size_range() uses the previous - // passed in values for min and max as a starting point. - // Find the min and max partition used in previous frame at this location - if (cm->frame_type != KEY_FRAME) { - MB_MODE_INFO **prev_mi = - &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]; - get_sb_partition_size_range(cm, xd, prev_mi, &min_size, &max_size); - } - // Find the min and max partition sizes used in the left superblock - if (left_in_image) { - MB_MODE_INFO **left_sb_mi = &mi[-cm->seq_params.mib_size]; - get_sb_partition_size_range(cm, xd, left_sb_mi, &min_size, &max_size); - } - // Find the min and max partition sizes used in the above suprblock. - if (above_in_image) { - MB_MODE_INFO **above_sb_mi = - &mi[-xd->mi_stride * cm->seq_params.mib_size]; - get_sb_partition_size_range(cm, xd, above_sb_mi, &min_size, &max_size); - } - - // Adjust observed min and max for "relaxed" auto partition case. - if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { - min_size = min_partition_size[min_size]; - max_size = max_partition_size[max_size]; - } - } - - // Check border cases where max and min from neighbors may not be legal. - max_size = find_partition_size(max_size, mi_rows_remaining, mi_cols_remaining, - &bh, &bw); - min_size = AOMMIN(min_size, max_size); - - // Test for blocks at the edge of the active image. - // This may be the actual edge of the image or where there are formatting - // bars. - if (active_edge_sb(cpi, mi_row, mi_col)) { - min_size = BLOCK_4X4; - } else { - min_size = AOMMIN(cpi->sf.rd_auto_partition_min_limit, min_size); - } - - // When use_square_partition_only is true, make sure at least one square - // partition is allowed by selecting the next smaller square size as - // *min_block_size. - if (min_size >= cpi->sf.use_square_partition_only_threshold) { - min_size = AOMMIN(min_size, next_square_size[max_size]); - } - - *min_block_size = AOMMIN(min_size, cm->seq_params.sb_size); - *max_block_size = AOMMIN(max_size, cm->seq_params.sb_size); -} - -// TODO(jingning) refactor functions setting partition search range -static void set_partition_range(const AV1_COMMON *const cm, - const MACROBLOCKD *const xd, int mi_row, - int mi_col, BLOCK_SIZE bsize, - BLOCK_SIZE *const min_bs, - BLOCK_SIZE *const max_bs) { - const int mi_width = mi_size_wide[bsize]; - const int mi_height = mi_size_high[bsize]; - int idx, idy; - - const int idx_str = cm->mi_stride * mi_row + mi_col; - MB_MODE_INFO **const prev_mi = &cm->prev_mi_grid_visible[idx_str]; - BLOCK_SIZE min_size = cm->seq_params.sb_size; // default values - BLOCK_SIZE max_size = BLOCK_4X4; - - if (prev_mi) { - for (idy = 0; idy < mi_height; ++idy) { - for (idx = 0; idx < mi_width; ++idx) { - const MB_MODE_INFO *const mi = prev_mi[idy * cm->mi_stride + idx]; - const BLOCK_SIZE bs = mi ? mi->sb_type : bsize; - min_size = AOMMIN(min_size, bs); - max_size = AOMMAX(max_size, bs); - } - } - } - - if (xd->left_available) { - for (idy = 0; idy < mi_height; ++idy) { - const MB_MODE_INFO *const mi = xd->mi[idy * cm->mi_stride - 1]; - const BLOCK_SIZE bs = mi ? mi->sb_type : bsize; - min_size = AOMMIN(min_size, bs); - max_size = AOMMAX(max_size, bs); - } - } - - if (xd->up_available) { - for (idx = 0; idx < mi_width; ++idx) { - const MB_MODE_INFO *const mi = xd->mi[idx - cm->mi_stride]; - const BLOCK_SIZE bs = mi ? mi->sb_type : bsize; - min_size = AOMMIN(min_size, bs); - max_size = AOMMAX(max_size, bs); - } - } - - if (min_size == max_size) { - min_size = min_partition_size[min_size]; - max_size = max_partition_size[max_size]; - } - - *min_bs = AOMMIN(min_size, cm->seq_params.sb_size); - *max_bs = AOMMIN(max_size, cm->seq_params.sb_size); -} - -static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { - memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv)); -} - -static INLINE void load_pred_mv(MACROBLOCK *x, - const PICK_MODE_CONTEXT *const ctx) { - memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv)); -} - -#if CONFIG_FP_MB_STATS -const int qindex_skip_threshold_lookup[BLOCK_SIZES] = { - 0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120, - // TODO(debargha): What are the correct numbers here? - 130, 130, 150 -}; -const int qindex_split_threshold_lookup[BLOCK_SIZES] = { - 0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120, - // TODO(debargha): What are the correct numbers here? - 160, 160, 240 -}; -const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6, - // TODO(debargha): What are the correct numbers here? - 8, 8, 10 -}; - -typedef enum { - MV_ZERO = 0, - MV_LEFT = 1, - MV_UP = 2, - MV_RIGHT = 3, - MV_DOWN = 4, - MV_INVALID -} MOTION_DIRECTION; - -static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) { - if (fp_byte & FPMB_MOTION_ZERO_MASK) { - return MV_ZERO; - } else if (fp_byte & FPMB_MOTION_LEFT_MASK) { - return MV_LEFT; - } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) { - return MV_RIGHT; - } else if (fp_byte & FPMB_MOTION_UP_MASK) { - return MV_UP; - } else { - return MV_DOWN; - } -} - -static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv, - MOTION_DIRECTION that_mv) { - if (this_mv == that_mv) { - return 0; - } else { - return abs(this_mv - that_mv) == 2 ? 2 : 1; - } -} -#endif - -// Try searching for an encoding for the given subblock. Returns zero if the -// rdcost is already too high (to tell the caller not to bother searching for -// encodings of further subblocks) -static int rd_try_subblock(AV1_COMP *const cpi, ThreadData *td, - TileDataEnc *tile_data, TOKENEXTRA **tp, int is_last, - int mi_row, int mi_col, BLOCK_SIZE subsize, - RD_STATS *best_rdc, RD_STATS *sum_rdc, - RD_STATS *this_rdc, PARTITION_TYPE partition, - PICK_MODE_CONTEXT *prev_ctx, - PICK_MODE_CONTEXT *this_ctx) { -#define RTS_X_RATE_NOCOEF_ARG -#define RTS_MAX_RDCOST best_rdc->rdcost - - MACROBLOCK *const x = &td->mb; - - if (cpi->sf.adaptive_motion_search) load_pred_mv(x, prev_ctx); - - const int64_t rdcost_remaining = best_rdc->rdcost == INT64_MAX - ? INT64_MAX - : (best_rdc->rdcost - sum_rdc->rdcost); - - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc, - RTS_X_RATE_NOCOEF_ARG partition, subsize, this_ctx, - rdcost_remaining); - - if (this_rdc->rate == INT_MAX) { - sum_rdc->rdcost = INT64_MAX; - } else { - sum_rdc->rate += this_rdc->rate; - sum_rdc->dist += this_rdc->dist; - sum_rdc->rdcost += this_rdc->rdcost; - } - - if (sum_rdc->rdcost >= RTS_MAX_RDCOST) return 0; - - if (!is_last) { - update_state(cpi, tile_data, td, this_ctx, mi_row, mi_col, subsize, 1); - encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, - subsize, NULL); - } - - return 1; - -#undef RTS_X_RATE_NOCOEF_ARG -#undef RTS_MAX_RDCOST -} - -static void rd_test_partition3(AV1_COMP *const cpi, ThreadData *td, - TileDataEnc *tile_data, TOKENEXTRA **tp, - PC_TREE *pc_tree, RD_STATS *best_rdc, - PICK_MODE_CONTEXT ctxs[3], - PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, - BLOCK_SIZE bsize, PARTITION_TYPE partition, - int mi_row0, int mi_col0, BLOCK_SIZE subsize0, - int mi_row1, int mi_col1, BLOCK_SIZE subsize1, - int mi_row2, int mi_col2, BLOCK_SIZE subsize2) { - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - RD_STATS sum_rdc, this_rdc; -#define RTP_STX_TRY_ARGS - int pl = partition_plane_context(xd, mi_row, mi_col, bsize); - av1_init_rd_stats(&sum_rdc); - sum_rdc.rate = x->partition_cost[pl][partition]; - sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); - if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row0, mi_col0, subsize0, - best_rdc, &sum_rdc, &this_rdc, - RTP_STX_TRY_ARGS partition, ctx, &ctxs[0])) - return; - - if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row1, mi_col1, subsize1, - best_rdc, &sum_rdc, &this_rdc, - RTP_STX_TRY_ARGS partition, &ctxs[0], &ctxs[1])) - return; - - // With the new layout of mixed partitions for PARTITION_HORZ_B and - // PARTITION_VERT_B, the last subblock might start past halfway through the - // main block, so we might signal it even though the subblock lies strictly - // outside the image. In that case, we won't spend any bits coding it and the - // difference (obviously) doesn't contribute to the error. - const int try_block2 = 1; - if (try_block2 && - !rd_try_subblock(cpi, td, tile_data, tp, 1, mi_row2, mi_col2, subsize2, - best_rdc, &sum_rdc, &this_rdc, - RTP_STX_TRY_ARGS partition, &ctxs[1], &ctxs[2])) - return; - - if (sum_rdc.rdcost >= best_rdc->rdcost) return; - - sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); - - if (sum_rdc.rdcost >= best_rdc->rdcost) return; - - *best_rdc = sum_rdc; - pc_tree->partitioning = partition; - -#undef RTP_STX_TRY_ARGS -} - -static void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) { - pc_tree->partitioning = PARTITION_NONE; - pc_tree->cb_search_range = SEARCH_FULL_PLANE; - pc_tree->none.skip = 0; - - if (bsize >= BLOCK_8X8) { - BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); - for (int idx = 0; idx < 4; ++idx) - reset_partition(pc_tree->split[idx], subsize); - } -} - -static void rd_pick_sqr_partition(AV1_COMP *const cpi, ThreadData *td, - TileDataEnc *tile_data, TOKENEXTRA **tp, - int mi_row, int mi_col, BLOCK_SIZE bsize, - RD_STATS *rd_cost, int64_t best_rd, - PC_TREE *pc_tree, int64_t *none_rd) { - const AV1_COMMON *const cm = &cpi->common; - TileInfo *const tile_info = &tile_data->tile_info; - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - const int mi_step = mi_size_wide[bsize] / 2; - RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; - const TOKENEXTRA *const tp_orig = *tp; - PICK_MODE_CONTEXT *ctx_none = &pc_tree->none; - int tmp_partition_cost[PARTITION_TYPES]; - BLOCK_SIZE subsize; - RD_STATS this_rdc, sum_rdc, best_rdc, pn_rdc; - const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8); - int do_square_split = bsize_at_least_8x8; - const int pl = bsize_at_least_8x8 - ? partition_plane_context(xd, mi_row, mi_col, bsize) - : 0; - const int *partition_cost = - pl >= 0 ? x->partition_cost[pl] : x->partition_cost[0]; - const int num_planes = av1_num_planes(cm); - - int64_t split_rd[4] = { 0, 0, 0, 0 }; - - // Override skipping rectangular partition operations for edge blocks - const int has_rows = (mi_row + mi_step < cm->mi_rows); - const int has_cols = (mi_col + mi_step < cm->mi_cols); - - if (none_rd) *none_rd = 0; - - int partition_none_allowed = has_rows && has_cols; - - (void)*tp_orig; - (void)split_rd; - - if (best_rd < 0) { - pc_tree->none.rdcost = INT64_MAX; - pc_tree->none.skip = 0; - av1_invalid_rd_stats(rd_cost); - return; - } - pc_tree->pc_tree_stats.valid = 1; - - // Override partition costs at the edges of the frame in the same - // way as in read_partition (see decodeframe.c) - if (!(has_rows && has_cols)) { - assert(bsize_at_least_8x8 && pl >= 0); - const aom_cdf_prob *partition_cdf = cm->fc->partition_cdf[pl]; - for (int i = 0; i < PARTITION_TYPES; ++i) tmp_partition_cost[i] = INT_MAX; - if (has_cols) { - // At the bottom, the two possibilities are HORZ and SPLIT - aom_cdf_prob bot_cdf[2]; - partition_gather_vert_alike(bot_cdf, partition_cdf, bsize); - static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT }; - av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map); - } else if (has_rows) { - // At the right, the two possibilities are VERT and SPLIT - aom_cdf_prob rhs_cdf[2]; - partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize); - static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT }; - av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map); - } else { - // At the bottom right, we always split - tmp_partition_cost[PARTITION_SPLIT] = 0; - } - - partition_cost = tmp_partition_cost; - } - -#ifndef NDEBUG - // Nothing should rely on the default value of this array (which is just - // leftover from encoding the previous block. Setting it to fixed pattern - // when debugging. - // bit 0, 1, 2 are blk_skip of each plane - // bit 4, 5, 6 are initialization checking of each plane - memset(x->blk_skip, 0x77, sizeof(x->blk_skip)); -#endif // NDEBUG - - assert(mi_size_wide[bsize] == mi_size_high[bsize]); - - av1_init_rd_stats(&this_rdc); - av1_init_rd_stats(&sum_rdc); - av1_invalid_rd_stats(&best_rdc); - best_rdc.rdcost = best_rd; - - set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); - - if (bsize == BLOCK_16X16 && cpi->vaq_refresh) - x->mb_energy = av1_log_block_var(cpi, x, bsize); - - xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col; - xd->left_txfm_context = - xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); - save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - -#if CONFIG_DIST_8X8 - if (x->using_dist_8x8) { - if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8) - do_square_split = 0; - } -#endif - - // PARTITION_NONE - if (partition_none_allowed) { - int pt_cost = 0; - if (bsize_at_least_8x8) { - pc_tree->partitioning = PARTITION_NONE; - pt_cost = partition_cost[PARTITION_NONE] < INT_MAX - ? partition_cost[PARTITION_NONE] - : 0; - } - int64_t partition_rd_cost = RDCOST(x->rdmult, pt_cost, 0); - int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX - ? INT64_MAX - : (best_rdc.rdcost - partition_rd_cost); - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, - PARTITION_NONE, bsize, ctx_none, best_remain_rdcost); - - pc_tree->pc_tree_stats.rdcost = ctx_none->rdcost; - pc_tree->pc_tree_stats.skip = ctx_none->skip; - - if (none_rd) *none_rd = this_rdc.rdcost; - if (this_rdc.rate != INT_MAX) { - if (bsize_at_least_8x8) { - this_rdc.rate += pt_cost; - this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist); - } - - if (this_rdc.rdcost < best_rdc.rdcost) { - // Adjust dist breakout threshold according to the partition size. - const int64_t dist_breakout_thr = - cpi->sf.partition_search_breakout_dist_thr >> - ((2 * (MAX_SB_SIZE_LOG2 - 2)) - - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize])); - const int rate_breakout_thr = - cpi->sf.partition_search_breakout_rate_thr * - num_pels_log2_lookup[bsize]; - - best_rdc = this_rdc; - if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE; - - pc_tree->cb_search_range = SEARCH_FULL_PLANE; - - // If all y, u, v transform blocks in this partition are skippable, and - // the dist & rate are within the thresholds, the partition search is - // terminated for current branch of the partition search tree. - // The dist & rate thresholds are set to 0 at speed 0 to disable the - // early termination at that speed. - if (!x->e_mbd.lossless[xd->mi[0]->segment_id] && - (ctx_none->skippable && best_rdc.dist < dist_breakout_thr && - best_rdc.rate < rate_breakout_thr)) { - do_square_split = 0; - } - } - } - - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - } - - // store estimated motion vector - if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none); - - int64_t temp_best_rdcost = best_rdc.rdcost; - pn_rdc = best_rdc; - - // PARTITION_SPLIT - if (do_square_split) { - int reached_last_index = 0; - subsize = get_partition_subsize(bsize, PARTITION_SPLIT); - int idx; - - for (idx = 0; idx < 4 && sum_rdc.rdcost < temp_best_rdcost; ++idx) { - const int x_idx = (idx & 1) * mi_step; - const int y_idx = (idx >> 1) * mi_step; - - if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) - continue; - - if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none); - - pc_tree->split[idx]->index = idx; - int64_t *p_split_rd = &split_rd[idx]; - // TODO(Cherma) : Account for partition cost while passing best rd to - // rd_pick_sqr_partition() - rd_pick_sqr_partition(cpi, td, tile_data, tp, mi_row + y_idx, - mi_col + x_idx, subsize, &this_rdc, - temp_best_rdcost - sum_rdc.rdcost, - pc_tree->split[idx], p_split_rd); - - pc_tree->pc_tree_stats.sub_block_rdcost[idx] = this_rdc.rdcost; - pc_tree->pc_tree_stats.sub_block_skip[idx] = - pc_tree->split[idx]->none.skip; - - if (this_rdc.rate == INT_MAX) { - sum_rdc.rdcost = INT64_MAX; - break; - } else { - sum_rdc.rate += this_rdc.rate; - sum_rdc.dist += this_rdc.dist; - sum_rdc.rdcost += this_rdc.rdcost; - } - } - reached_last_index = (idx == 4); - - if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) { - sum_rdc.rate += partition_cost[PARTITION_SPLIT]; - sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); - - if (sum_rdc.rdcost < best_rdc.rdcost) { - best_rdc = sum_rdc; - pc_tree->partitioning = PARTITION_SPLIT; - } - } - - int has_split = 0; - if (pc_tree->partitioning == PARTITION_SPLIT) { - for (int cb_idx = 0; cb_idx <= AOMMIN(idx, 3); ++cb_idx) { - if (pc_tree->split[cb_idx]->partitioning == PARTITION_SPLIT) - ++has_split; - } - - if (has_split >= 3 || sum_rdc.rdcost < (pn_rdc.rdcost >> 1)) { - pc_tree->cb_search_range = SPLIT_PLANE; - } - } - - if (pc_tree->partitioning == PARTITION_NONE) { - pc_tree->cb_search_range = SEARCH_SAME_PLANE; - if (pn_rdc.dist <= sum_rdc.dist) - pc_tree->cb_search_range = NONE_PARTITION_PLANE; - } - - if (pn_rdc.rate == INT_MAX) pc_tree->cb_search_range = NONE_PARTITION_PLANE; - - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - } // if (do_split) - - pc_tree->pc_tree_stats.split = pc_tree->partitioning == PARTITION_SPLIT; - if (do_square_split) { - for (int i = 0; i < 4; ++i) { - pc_tree->pc_tree_stats.sub_block_split[i] = - pc_tree->split[i]->partitioning == PARTITION_SPLIT; - } - } - - // TODO(jbb): This code added so that we avoid static analysis - // warning related to the fact that best_rd isn't used after this - // point. This code should be refactored so that the duplicate - // checks occur in some sub function and thus are used... - (void)best_rd; - *rd_cost = best_rdc; - - if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && - pc_tree->index != 3) { - if (bsize == cm->seq_params.sb_size) { - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - } else { - encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, - pc_tree, NULL); - } - } - - if (bsize == cm->seq_params.sb_size) { - assert(best_rdc.rate < INT_MAX); - assert(best_rdc.dist < INT64_MAX); - } else { - assert(tp_orig == *tp); - } -} - -#define FEATURE_SIZE 19 -static const float two_pass_split_partition_weights_128[FEATURE_SIZE + 1] = { - 2.683936f, -0.193620f, -4.106470f, -0.141320f, -0.282289f, - 0.125296f, -1.134961f, 0.862757f, -0.418799f, -0.637666f, - 0.016232f, 0.345013f, 0.018823f, -0.393394f, -1.130700f, - 0.695357f, 0.112569f, -0.341975f, -0.513882f, 5.7488966f, -}; - -static const float two_pass_split_partition_weights_64[FEATURE_SIZE + 1] = { - 2.990993f, 0.423273f, -0.926544f, 0.454646f, -0.292698f, - -1.311632f, -0.284432f, 0.717141f, -0.419257f, -0.574760f, - -0.674444f, 0.669047f, -0.374255f, 0.380624f, -0.804036f, - 0.264021f, 0.004163f, 1.896802f, 0.924287f, 0.13490619f, -}; - -static const float two_pass_split_partition_weights_32[FEATURE_SIZE + 1] = { - 2.795181f, -0.136943f, -0.924842f, 0.405330f, -0.463505f, - -0.584076f, -0.831472f, 0.382985f, -0.597544f, -0.138915f, - -1.354350f, 0.466035f, -0.553961f, 0.213202f, -1.166429f, - 0.010776f, -0.096236f, 2.335084f, 1.699857f, -0.58178353f, -}; - -static const float two_pass_split_partition_weights_16[FEATURE_SIZE + 1] = { - 1.987888f, -0.431100f, -1.687703f, 0.262602f, -0.425298f, - -0.463870f, -1.493457f, 0.470917f, -0.528457f, -0.087700f, - -1.815092f, 0.152883f, -0.337908f, 0.093679f, -1.548267f, - -0.042387f, -0.000861f, 2.556746f, 1.619192f, 0.03643292f, -}; - -static const float two_pass_split_partition_weights_8[FEATURE_SIZE + 1] = { - 2.188344f, -0.817528f, -2.119219f, 0.000000f, -0.348167f, - -0.658074f, -1.960362f, 0.000000f, -0.403080f, 0.282699f, - -2.061088f, 0.000000f, -0.431919f, -0.127960f, -1.099550f, - 0.000000f, 0.121622f, 2.017455f, 2.058228f, -0.15475988f, -}; - -static const float two_pass_none_partition_weights_128[FEATURE_SIZE + 1] = { - -1.006689f, 0.777908f, 4.461072f, -0.395782f, -0.014610f, - -0.853863f, 0.729997f, -0.420477f, 0.282429f, -1.194595f, - 3.181220f, -0.511416f, 0.117084f, -1.149348f, 1.507990f, - -0.477212f, 0.202963f, -1.469581f, 0.624461f, -0.89081228f, -}; - -static const float two_pass_none_partition_weights_64[FEATURE_SIZE + 1] = { - -1.241117f, 0.844878f, 5.638803f, -0.489780f, -0.108796f, - -4.576821f, 1.540624f, -0.477519f, 0.227791f, -1.443968f, - 1.586911f, -0.505125f, 0.140764f, -0.464194f, 1.466658f, - -0.641166f, 0.195412f, 1.427905f, 2.080007f, -1.98272777f, -}; - -static const float two_pass_none_partition_weights_32[FEATURE_SIZE + 1] = { - -2.130825f, 0.476023f, 5.907343f, -0.516002f, -0.097471f, - -2.662754f, 0.614858f, -0.576728f, 0.085261f, -0.031901f, - 0.727842f, -0.600034f, 0.079326f, 0.324328f, 0.504502f, - -0.547105f, -0.037670f, 0.304995f, 0.369018f, -2.66299987f, -}; - -static const float two_pass_none_partition_weights_16[FEATURE_SIZE + 1] = { - -1.626410f, 0.872047f, 5.414965f, -0.554781f, -0.084514f, - -3.020550f, 0.467632f, -0.382280f, 0.199568f, 0.426220f, - 0.829426f, -0.467100f, 0.153098f, 0.662994f, 0.327545f, - -0.560106f, -0.141610f, 0.403372f, 0.523991f, -3.02891231f, -}; - -static const float two_pass_none_partition_weights_8[FEATURE_SIZE + 1] = { - -1.463349f, 0.375376f, 4.751430f, 0.000000f, -0.184451f, - -1.655447f, 0.443214f, 0.000000f, 0.127961f, 0.152435f, - 0.083288f, 0.000000f, 0.143105f, 0.438012f, 0.073238f, - 0.000000f, -0.278137f, 0.186134f, 0.073737f, -1.6494962f, -}; - -// split_score indicates confidence of picking split partition; -// none_score indicates confidence of picking none partition; -static int ml_prune_2pass_split_partition(const PC_TREE_STATS *pc_tree_stats, - BLOCK_SIZE bsize, int *split_score, - int *none_score) { - if (!pc_tree_stats->valid) return 0; - const float *split_weights = NULL; - const float *none_weights = NULL; - switch (bsize) { - case BLOCK_4X4: break; - case BLOCK_8X8: - split_weights = two_pass_split_partition_weights_8; - none_weights = two_pass_none_partition_weights_8; - break; - case BLOCK_16X16: - split_weights = two_pass_split_partition_weights_16; - none_weights = two_pass_none_partition_weights_16; - break; - case BLOCK_32X32: - split_weights = two_pass_split_partition_weights_32; - none_weights = two_pass_none_partition_weights_32; - break; - case BLOCK_64X64: - split_weights = two_pass_split_partition_weights_64; - none_weights = two_pass_none_partition_weights_64; - break; - case BLOCK_128X128: - split_weights = two_pass_split_partition_weights_128; - none_weights = two_pass_none_partition_weights_128; - break; - default: assert(0 && "Unexpected bsize."); - } - if (!split_weights || !none_weights) return 0; - - aom_clear_system_state(); - - float features[FEATURE_SIZE]; - int feature_index = 0; - features[feature_index++] = (float)pc_tree_stats->split; - features[feature_index++] = (float)pc_tree_stats->skip; - const int rdcost = (int)AOMMIN(INT_MAX, pc_tree_stats->rdcost); - const int rd_valid = rdcost > 0 && rdcost < 1000000000; - features[feature_index++] = (float)rd_valid; - for (int i = 0; i < 4; ++i) { - features[feature_index++] = (float)pc_tree_stats->sub_block_split[i]; - features[feature_index++] = (float)pc_tree_stats->sub_block_skip[i]; - const int sub_rdcost = - (int)AOMMIN(INT_MAX, pc_tree_stats->sub_block_rdcost[i]); - const int sub_rd_valid = sub_rdcost > 0 && sub_rdcost < 1000000000; - features[feature_index++] = (float)sub_rd_valid; - // Ratio between the sub-block RD and the whole-block RD. - float rd_ratio = 1.0f; - if (rd_valid && sub_rd_valid && sub_rdcost < rdcost) - rd_ratio = (float)sub_rdcost / (float)rdcost; - features[feature_index++] = rd_ratio; - } - assert(feature_index == FEATURE_SIZE); - - float score_1 = split_weights[FEATURE_SIZE]; - float score_2 = none_weights[FEATURE_SIZE]; - for (int i = 0; i < FEATURE_SIZE; ++i) { - score_1 += features[i] * split_weights[i]; - score_2 += features[i] * none_weights[i]; - } - *split_score = (int)(score_1 * 100); - *none_score = (int)(score_2 * 100); - return 1; -} -#undef FEATURE_SIZE - -static void ml_prune_rect_partition(const AV1_COMP *const cpi, - const MACROBLOCK *const x, BLOCK_SIZE bsize, - int64_t best_rd, int64_t none_rd, - int64_t *split_rd, - int *const dst_prune_horz, - int *const dst_prune_vert) { - if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return; - best_rd = AOMMAX(best_rd, 1); - const NN_CONFIG *nn_config = NULL; - const float prob_thresholds[5] = { 0.01f, 0.01f, 0.004f, 0.002f, 0.002f }; - float cur_thresh = 0.0f; - switch (bsize) { - case BLOCK_8X8: - nn_config = &av1_rect_partition_nnconfig_8; - cur_thresh = prob_thresholds[0]; - break; - case BLOCK_16X16: - nn_config = &av1_rect_partition_nnconfig_16; - cur_thresh = prob_thresholds[1]; - break; - case BLOCK_32X32: - nn_config = &av1_rect_partition_nnconfig_32; - cur_thresh = prob_thresholds[2]; - break; - case BLOCK_64X64: - nn_config = &av1_rect_partition_nnconfig_64; - cur_thresh = prob_thresholds[3]; - break; - case BLOCK_128X128: - nn_config = &av1_rect_partition_nnconfig_128; - cur_thresh = prob_thresholds[4]; - break; - default: assert(0 && "Unexpected bsize."); - } - if (!nn_config) return; - aom_clear_system_state(); - - // 1. Compute input features - float features[9]; - - // RD cost ratios - for (int i = 0; i < 5; i++) features[i] = 1.0f; - if (none_rd > 0 && none_rd < 1000000000) - features[0] = (float)none_rd / (float)best_rd; - for (int i = 0; i < 4; i++) { - if (split_rd[i] > 0 && split_rd[i] < 1000000000) - features[1 + i] = (float)split_rd[i] / (float)best_rd; - } - - // Variance ratios - const MACROBLOCKD *const xd = &x->e_mbd; - int whole_block_variance; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - whole_block_variance = av1_high_get_sby_perpixel_variance( - cpi, &x->plane[0].src, bsize, xd->bd); - } else { - whole_block_variance = - av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); - } - whole_block_variance = AOMMAX(whole_block_variance, 1); - - int split_variance[4]; - const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); - struct buf_2d buf; - buf.stride = x->plane[0].src.stride; - const int bw = block_size_wide[bsize]; - for (int i = 0; i < 4; ++i) { - const int x_idx = (i & 1) * bw / 2; - const int y_idx = (i >> 1) * bw / 2; - buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - split_variance[i] = - av1_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd); - } else { - split_variance[i] = av1_get_sby_perpixel_variance(cpi, &buf, subsize); - } - } - - for (int i = 0; i < 4; i++) - features[5 + i] = (float)split_variance[i] / (float)whole_block_variance; - - // 2. Do the prediction and prune 0-2 partitions based on their probabilities - float raw_scores[3] = { 0.0f }; - av1_nn_predict(features, nn_config, raw_scores); - float probs[3] = { 0.0f }; - av1_nn_softmax(raw_scores, probs, 3); - - // probs[0] is the probability of the fact that both rectangular partitions - // are worse than current best_rd - if (probs[1] <= cur_thresh) (*dst_prune_horz) = 1; - if (probs[2] <= cur_thresh) (*dst_prune_vert) = 1; -} - -// Use a ML model to predict if horz_a, horz_b, vert_a, and vert_b should be -// considered. -static void ml_prune_ab_partition(BLOCK_SIZE bsize, int part_ctx, int var_ctx, - int64_t best_rd, int64_t horz_rd[2], - int64_t vert_rd[2], int64_t split_rd[4], - int *const horza_partition_allowed, - int *const horzb_partition_allowed, - int *const verta_partition_allowed, - int *const vertb_partition_allowed) { - if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return; - const NN_CONFIG *nn_config = NULL; - switch (bsize) { - case BLOCK_8X8: nn_config = NULL; break; - case BLOCK_16X16: nn_config = &av1_ab_partition_nnconfig_16; break; - case BLOCK_32X32: nn_config = &av1_ab_partition_nnconfig_32; break; - case BLOCK_64X64: nn_config = &av1_ab_partition_nnconfig_64; break; - case BLOCK_128X128: nn_config = &av1_ab_partition_nnconfig_128; break; - default: assert(0 && "Unexpected bsize."); - } - if (!nn_config) return; - - aom_clear_system_state(); - - // Generate features. - float features[10]; - int feature_index = 0; - features[feature_index++] = (float)part_ctx; - features[feature_index++] = (float)var_ctx; - const int rdcost = (int)AOMMIN(INT_MAX, best_rd); - int sub_block_rdcost[8] = { 0 }; - int rd_index = 0; - for (int i = 0; i < 2; ++i) { - if (horz_rd[i] > 0 && horz_rd[i] < 1000000000) - sub_block_rdcost[rd_index] = (int)horz_rd[i]; - ++rd_index; - } - for (int i = 0; i < 2; ++i) { - if (vert_rd[i] > 0 && vert_rd[i] < 1000000000) - sub_block_rdcost[rd_index] = (int)vert_rd[i]; - ++rd_index; - } - for (int i = 0; i < 4; ++i) { - if (split_rd[i] > 0 && split_rd[i] < 1000000000) - sub_block_rdcost[rd_index] = (int)split_rd[i]; - ++rd_index; - } - for (int i = 0; i < 8; ++i) { - // Ratio between the sub-block RD and the whole-block RD. - float rd_ratio = 1.0f; - if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost) - rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost; - features[feature_index++] = rd_ratio; - } - assert(feature_index == 10); - - // Calculate scores using the NN model. - float score[16] = { 0.0f }; - av1_nn_predict(features, nn_config, score); - int int_score[16]; - int max_score = -1000; - for (int i = 0; i < 16; ++i) { - int_score[i] = (int)(100 * score[i]); - max_score = AOMMAX(int_score[i], max_score); - } - - // Make decisions based on the model scores. - int thresh = max_score; - switch (bsize) { - case BLOCK_16X16: thresh -= 150; break; - case BLOCK_32X32: thresh -= 100; break; - default: break; - } - *horza_partition_allowed = 0; - *horzb_partition_allowed = 0; - *verta_partition_allowed = 0; - *vertb_partition_allowed = 0; - for (int i = 0; i < 16; ++i) { - if (int_score[i] >= thresh) { - if ((i >> 0) & 1) *horza_partition_allowed = 1; - if ((i >> 1) & 1) *horzb_partition_allowed = 1; - if ((i >> 2) & 1) *verta_partition_allowed = 1; - if ((i >> 3) & 1) *vertb_partition_allowed = 1; - } - } -} - -#define FEATURES 18 -#define LABELS 4 -// Use a ML model to predict if horz4 and vert4 should be considered. -static void ml_prune_4_partition(const AV1_COMP *const cpi, MACROBLOCK *const x, - BLOCK_SIZE bsize, int part_ctx, - int64_t best_rd, int64_t horz_rd[2], - int64_t vert_rd[2], int64_t split_rd[4], - int *const partition_horz4_allowed, - int *const partition_vert4_allowed, - unsigned int pb_source_variance, int mi_row, - int mi_col) { - if (best_rd >= 1000000000) return; - const NN_CONFIG *nn_config = NULL; - switch (bsize) { - case BLOCK_16X16: nn_config = &av1_4_partition_nnconfig_16; break; - case BLOCK_32X32: nn_config = &av1_4_partition_nnconfig_32; break; - case BLOCK_64X64: nn_config = &av1_4_partition_nnconfig_64; break; - default: assert(0 && "Unexpected bsize."); - } - if (!nn_config) return; - - aom_clear_system_state(); - - // Generate features. - float features[FEATURES]; - int feature_index = 0; - features[feature_index++] = (float)part_ctx; - features[feature_index++] = (float)get_unsigned_bits(pb_source_variance); - - const int rdcost = (int)AOMMIN(INT_MAX, best_rd); - int sub_block_rdcost[8] = { 0 }; - int rd_index = 0; - for (int i = 0; i < 2; ++i) { - if (horz_rd[i] > 0 && horz_rd[i] < 1000000000) - sub_block_rdcost[rd_index] = (int)horz_rd[i]; - ++rd_index; - } - for (int i = 0; i < 2; ++i) { - if (vert_rd[i] > 0 && vert_rd[i] < 1000000000) - sub_block_rdcost[rd_index] = (int)vert_rd[i]; - ++rd_index; - } - for (int i = 0; i < 4; ++i) { - if (split_rd[i] > 0 && split_rd[i] < 1000000000) - sub_block_rdcost[rd_index] = (int)split_rd[i]; - ++rd_index; - } - for (int i = 0; i < 8; ++i) { - // Ratio between the sub-block RD and the whole-block RD. - float rd_ratio = 1.0f; - if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost) - rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost; - features[feature_index++] = rd_ratio; - } - - // Get variance of the 1:4 and 4:1 sub-blocks. - unsigned int horz_4_source_var[4] = { 0 }; - unsigned int vert_4_source_var[4] = { 0 }; - { - BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4); - BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4); - av1_setup_src_planes(x, cpi->source, mi_row, mi_col, - av1_num_planes(&cpi->common)); - const int src_stride = x->plane[0].src.stride; - const uint8_t *src = x->plane[0].src.buf; - const MACROBLOCKD *const xd = &x->e_mbd; - for (int i = 0; i < 4; ++i) { - const uint8_t *horz_src = - src + i * block_size_high[horz_4_bs] * src_stride; - const uint8_t *vert_src = src + i * block_size_wide[vert_4_bs]; - unsigned int horz_var, vert_var, sse; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - switch (xd->bd) { - case 10: - horz_var = cpi->fn_ptr[horz_4_bs].vf( - horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10), - 0, &sse); - vert_var = cpi->fn_ptr[vert_4_bs].vf( - vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10), - 0, &sse); - break; - case 12: - horz_var = cpi->fn_ptr[horz_4_bs].vf( - horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12), - 0, &sse); - vert_var = cpi->fn_ptr[vert_4_bs].vf( - vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12), - 0, &sse); - break; - case 8: - default: - horz_var = cpi->fn_ptr[horz_4_bs].vf( - horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8), - 0, &sse); - vert_var = cpi->fn_ptr[vert_4_bs].vf( - vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8), - 0, &sse); - break; - } - horz_4_source_var[i] = - ROUND_POWER_OF_TWO(horz_var, num_pels_log2_lookup[horz_4_bs]); - vert_4_source_var[i] = - ROUND_POWER_OF_TWO(vert_var, num_pels_log2_lookup[vert_4_bs]); - } else { - horz_var = cpi->fn_ptr[horz_4_bs].vf(horz_src, src_stride, AV1_VAR_OFFS, - 0, &sse); - vert_var = cpi->fn_ptr[vert_4_bs].vf(vert_src, src_stride, AV1_VAR_OFFS, - 0, &sse); - horz_4_source_var[i] = - ROUND_POWER_OF_TWO(horz_var, num_pels_log2_lookup[horz_4_bs]); - vert_4_source_var[i] = - ROUND_POWER_OF_TWO(vert_var, num_pels_log2_lookup[vert_4_bs]); - } - } - } - - const float denom = (float)(pb_source_variance + 1); - const float low_b = 0.1f; - const float high_b = 10.0f; - for (int i = 0; i < 4; ++i) { - // Ratio between the 4:1 sub-block variance and the whole-block variance. - float var_ratio = (float)(horz_4_source_var[i] + 1) / denom; - if (var_ratio < low_b) var_ratio = low_b; - if (var_ratio > high_b) var_ratio = high_b; - features[feature_index++] = var_ratio; - } - for (int i = 0; i < 4; ++i) { - // Ratio between the 1:4 sub-block RD and the whole-block RD. - float var_ratio = (float)(vert_4_source_var[i] + 1) / denom; - if (var_ratio < low_b) var_ratio = low_b; - if (var_ratio > high_b) var_ratio = high_b; - features[feature_index++] = var_ratio; - } - assert(feature_index == FEATURES); - - // Calculate scores using the NN model. - float score[LABELS] = { 0.0f }; - av1_nn_predict(features, nn_config, score); - int int_score[LABELS]; - int max_score = -1000; - for (int i = 0; i < LABELS; ++i) { - int_score[i] = (int)(100 * score[i]); - max_score = AOMMAX(int_score[i], max_score); - } - - // Make decisions based on the model scores. - int thresh = max_score; - switch (bsize) { - case BLOCK_16X16: thresh -= 500; break; - case BLOCK_32X32: thresh -= 500; break; - case BLOCK_64X64: thresh -= 200; break; - default: break; - } - *partition_horz4_allowed = 0; - *partition_vert4_allowed = 0; - for (int i = 0; i < LABELS; ++i) { - if (int_score[i] >= thresh) { - if ((i >> 0) & 1) *partition_horz4_allowed = 1; - if ((i >> 1) & 1) *partition_vert4_allowed = 1; - } - } -} -#undef FEATURES -#undef LABELS - -#define FEATURES 4 -// ML-based partition search breakout. -static int ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize, - const MACROBLOCK *const x, - const RD_STATS *const rd_stats, - unsigned int pb_source_variance) { - const NN_CONFIG *nn_config = NULL; - int thresh = 0; - switch (bsize) { - case BLOCK_8X8: - nn_config = &av1_partition_breakout_nnconfig_8; - thresh = cpi->sf.ml_partition_search_breakout_thresh[0]; - break; - case BLOCK_16X16: - nn_config = &av1_partition_breakout_nnconfig_16; - thresh = cpi->sf.ml_partition_search_breakout_thresh[1]; - break; - case BLOCK_32X32: - nn_config = &av1_partition_breakout_nnconfig_32; - thresh = cpi->sf.ml_partition_search_breakout_thresh[2]; - break; - case BLOCK_64X64: - nn_config = &av1_partition_breakout_nnconfig_64; - thresh = cpi->sf.ml_partition_search_breakout_thresh[3]; - break; - case BLOCK_128X128: - nn_config = &av1_partition_breakout_nnconfig_128; - thresh = cpi->sf.ml_partition_search_breakout_thresh[4]; - break; - default: assert(0 && "Unexpected bsize."); - } - if (!nn_config || thresh < 0) return 0; - - // Generate feature values. - float features[FEATURES]; - int feature_index = 0; - aom_clear_system_state(); - - const int num_pels_log2 = num_pels_log2_lookup[bsize]; - float rate_f = (float)AOMMIN(rd_stats->rate, INT_MAX); - rate_f = ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) * - rate_f; - features[feature_index++] = rate_f; - - const float dist_f = - (float)(AOMMIN(rd_stats->dist, INT_MAX) >> num_pels_log2); - features[feature_index++] = dist_f; - - features[feature_index++] = (float)pb_source_variance; - - const int dc_q = (int)x->plane[0].dequant_QTX[0]; - features[feature_index++] = (float)(dc_q * dc_q) / 256.0f; - assert(feature_index == FEATURES); - - // Calculate score using the NN model. - float score = 0.0f; - av1_nn_predict(features, nn_config, &score); - - // Make decision. - return (int)(score * 100) >= thresh; -} -#undef FEATURES - -// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are -// unlikely to be selected depending on previous rate-distortion optimization -// results, for encoding speed-up. -static void rd_pick_partition(AV1_COMP *const cpi, ThreadData *td, - TileDataEnc *tile_data, TOKENEXTRA **tp, - int mi_row, int mi_col, BLOCK_SIZE bsize, - RD_STATS *rd_cost, int64_t best_rd, - PC_TREE *pc_tree, int64_t *none_rd) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - TileInfo *const tile_info = &tile_data->tile_info; - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - const int mi_step = mi_size_wide[bsize] / 2; - RD_SEARCH_MACROBLOCK_CONTEXT x_ctx; - const TOKENEXTRA *const tp_orig = *tp; - PICK_MODE_CONTEXT *ctx_none = &pc_tree->none; - int tmp_partition_cost[PARTITION_TYPES]; - BLOCK_SIZE subsize; - RD_STATS this_rdc, sum_rdc, best_rdc; - const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8); - int do_square_split = bsize_at_least_8x8; - const int pl = bsize_at_least_8x8 - ? partition_plane_context(xd, mi_row, mi_col, bsize) - : 0; - const int *partition_cost = - pl >= 0 ? x->partition_cost[pl] : x->partition_cost[0]; - - int do_rectangular_split = 1; - int64_t cur_none_rd = 0; - int64_t split_rd[4] = { 0, 0, 0, 0 }; - int64_t horz_rd[2] = { 0, 0 }; - int64_t vert_rd[2] = { 0, 0 }; - - int split_ctx_is_ready[2] = { 0, 0 }; - int horz_ctx_is_ready = 0; - int vert_ctx_is_ready = 0; - BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT); - - if (best_rd < 0) { - pc_tree->none.rdcost = INT64_MAX; - pc_tree->none.skip = 0; - av1_invalid_rd_stats(rd_cost); - return; - } - if (bsize == cm->seq_params.sb_size) x->must_find_valid_partition = 0; - - // Override skipping rectangular partition operations for edge blocks - const int has_rows = (mi_row + mi_step < cm->mi_rows); - const int has_cols = (mi_col + mi_step < cm->mi_cols); - const int xss = x->e_mbd.plane[1].subsampling_x; - const int yss = x->e_mbd.plane[1].subsampling_y; - - BLOCK_SIZE min_size = x->min_partition_size; - BLOCK_SIZE max_size = x->max_partition_size; - - if (none_rd) *none_rd = 0; - -#if CONFIG_FP_MB_STATS - unsigned int src_diff_var = UINT_MAX; - int none_complexity = 0; -#endif - - int partition_none_allowed = has_rows && has_cols; - int partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8; - int partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8; - - (void)*tp_orig; - - // Override partition costs at the edges of the frame in the same - // way as in read_partition (see decodeframe.c) - if (!(has_rows && has_cols)) { - assert(bsize_at_least_8x8 && pl >= 0); - const aom_cdf_prob *partition_cdf = cm->fc->partition_cdf[pl]; - for (int i = 0; i < PARTITION_TYPES; ++i) tmp_partition_cost[i] = INT_MAX; - if (has_cols) { - // At the bottom, the two possibilities are HORZ and SPLIT - aom_cdf_prob bot_cdf[2]; - partition_gather_vert_alike(bot_cdf, partition_cdf, bsize); - static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT }; - av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map); - } else if (has_rows) { - // At the right, the two possibilities are VERT and SPLIT - aom_cdf_prob rhs_cdf[2]; - partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize); - static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT }; - av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map); - } else { - // At the bottom right, we always split - tmp_partition_cost[PARTITION_SPLIT] = 0; - } - - partition_cost = tmp_partition_cost; - } - -#ifndef NDEBUG - // Nothing should rely on the default value of this array (which is just - // leftover from encoding the previous block. Setting it to fixed pattern - // when debugging. - // bit 0, 1, 2 are blk_skip of each plane - // bit 4, 5, 6 are initialization checking of each plane - memset(x->blk_skip, 0x77, sizeof(x->blk_skip)); -#endif // NDEBUG - - assert(mi_size_wide[bsize] == mi_size_high[bsize]); - - av1_init_rd_stats(&this_rdc); - av1_invalid_rd_stats(&best_rdc); - best_rdc.rdcost = best_rd; - - set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); - - if (bsize == BLOCK_16X16 && cpi->vaq_refresh) - x->mb_energy = av1_log_block_var(cpi, x, bsize); - - if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) { - const int cb_partition_search_ctrl = - ((pc_tree->index == 0 || pc_tree->index == 3) + - get_chessboard_index(cm->current_video_frame)) & - 0x1; - - if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size) - set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size); - } - - // Determine partition types in search according to the speed features. - // The threshold set here has to be of square block size. - if (cpi->sf.auto_min_max_partition_size) { - const int no_partition_allowed = (bsize <= max_size && bsize >= min_size); - // Note: Further partitioning is NOT allowed when bsize == min_size already. - const int partition_allowed = (bsize <= max_size && bsize > min_size); - partition_none_allowed &= no_partition_allowed; - partition_horz_allowed &= partition_allowed || !has_rows; - partition_vert_allowed &= partition_allowed || !has_cols; - do_square_split &= bsize > min_size; - } - - if (bsize > cpi->sf.use_square_partition_only_threshold) { - partition_horz_allowed &= !has_rows; - partition_vert_allowed &= !has_cols; - } - - if (bsize > BLOCK_4X4 && x->use_cb_search_range && - cpi->sf.auto_min_max_partition_size == 0) { - int split_score = 0; - int none_score = 0; - const int score_valid = ml_prune_2pass_split_partition( - &pc_tree->pc_tree_stats, bsize, &split_score, &none_score); - if (score_valid) { - { - const int only_split_thresh = 300; - const int no_none_thresh = 250; - const int no_split_thresh = 0; - if (split_score > only_split_thresh) { - partition_none_allowed = 0; - partition_horz_allowed = 0; - partition_vert_allowed = 0; - } else if (split_score > no_none_thresh) { - partition_none_allowed = 0; - } - if (split_score < no_split_thresh) do_square_split = 0; - } - { - const int no_split_thresh = 120; - const int no_none_thresh = -120; - if (none_score > no_split_thresh && partition_none_allowed) - do_square_split = 0; - if (none_score < no_none_thresh) partition_none_allowed = 0; - } - } else { - if (pc_tree->cb_search_range == SPLIT_PLANE) { - partition_none_allowed = 0; - partition_horz_allowed = 0; - partition_vert_allowed = 0; - } - if (pc_tree->cb_search_range == SEARCH_SAME_PLANE) do_square_split = 0; - if (pc_tree->cb_search_range == NONE_PARTITION_PLANE) { - do_square_split = 0; - partition_horz_allowed = 0; - partition_vert_allowed = 0; - } - } - - // Fall back to default values in case all partition modes are rejected. - if (partition_none_allowed == 0 && do_square_split == 0 && - partition_horz_allowed == 0 && partition_vert_allowed == 0) { - do_square_split = bsize_at_least_8x8; - partition_none_allowed = has_rows && has_cols; - partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8; - partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8; - } - } - - xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col; - xd->left_txfm_context = - xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); - save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); - src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row, - mi_col, bsize); - } - - // Decide whether we shall split directly and skip searching NONE by using - // the first pass block statistics - if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_square_split && - partition_none_allowed && src_diff_var > 4 && - cm->base_qindex < qindex_split_threshold_lookup[bsize]) { - int mb_row = mi_row >> 1; - int mb_col = mi_col >> 1; - int mb_row_end = - AOMMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows); - int mb_col_end = - AOMMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols); - int r, c; - - // compute a complexity measure, basically measure inconsistency of motion - // vectors obtained from the first pass in the current block - for (r = mb_row; r < mb_row_end; r++) { - for (c = mb_col; c < mb_col_end; c++) { - const int mb_index = r * cm->mb_cols + c; - - MOTION_DIRECTION this_mv; - MOTION_DIRECTION right_mv; - MOTION_DIRECTION bottom_mv; - - this_mv = - get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]); - - // to its right - if (c != mb_col_end - 1) { - right_mv = get_motion_direction_fp( - cpi->twopass.this_frame_mb_stats[mb_index + 1]); - none_complexity += get_motion_inconsistency(this_mv, right_mv); - } - - // to its bottom - if (r != mb_row_end - 1) { - bottom_mv = get_motion_direction_fp( - cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]); - none_complexity += get_motion_inconsistency(this_mv, bottom_mv); - } - - // do not count its left and top neighbors to avoid double counting - } - } - - if (none_complexity > complexity_16x16_blocks_threshold[bsize]) { - partition_none_allowed = 0; - } - } -#endif - - // Ref frames picked in the [i_th] quarter subblock during square partition - // RD search. It may be used to prune ref frame selection of rect partitions. - int ref_frames_used[4] = { - 0, - }; - -BEGIN_PARTITION_SEARCH: - if (x->must_find_valid_partition) { - partition_none_allowed = has_rows && has_cols; - partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8; - partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8; - } - - // Partition block source pixel variance. - unsigned int pb_source_variance = UINT_MAX; - -#if CONFIG_DIST_8X8 - if (x->using_dist_8x8) { - if (block_size_high[bsize] <= 8) partition_horz_allowed = 0; - if (block_size_wide[bsize] <= 8) partition_vert_allowed = 0; - if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8) - do_square_split = 0; - } -#endif - - // PARTITION_NONE - if (partition_none_allowed) { - int pt_cost = 0; - if (bsize_at_least_8x8) { - pt_cost = partition_cost[PARTITION_NONE] < INT_MAX - ? partition_cost[PARTITION_NONE] - : 0; - } - int64_t partition_rd_cost = RDCOST(x->rdmult, pt_cost, 0); - int64_t best_remain_rdcost = (best_rdc.rdcost == INT64_MAX) - ? INT64_MAX - : (best_rdc.rdcost - partition_rd_cost); - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, - PARTITION_NONE, bsize, ctx_none, best_remain_rdcost); - pb_source_variance = x->source_variance; - if (none_rd) *none_rd = this_rdc.rdcost; - cur_none_rd = this_rdc.rdcost; - if (this_rdc.rate != INT_MAX) { - if (cpi->sf.prune_ref_frame_for_rect_partitions) { - const int ref_type = av1_ref_frame_type(ctx_none->mic.ref_frame); - for (int i = 0; i < 4; ++i) { - ref_frames_used[i] |= (1 << ref_type); - } - } - if (bsize_at_least_8x8) { - this_rdc.rate += pt_cost; - this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist); - } - - if (this_rdc.rdcost < best_rdc.rdcost) { - // Adjust dist breakout threshold according to the partition size. - const int64_t dist_breakout_thr = - cpi->sf.partition_search_breakout_dist_thr >> - ((2 * (MAX_SB_SIZE_LOG2 - 2)) - - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize])); - const int rate_breakout_thr = - cpi->sf.partition_search_breakout_rate_thr * - num_pels_log2_lookup[bsize]; - - best_rdc = this_rdc; - if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE; - - if ((do_square_split || do_rectangular_split) && - !x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) { - const int use_ml_based_breakout = - bsize <= cpi->sf.use_square_partition_only_threshold && - bsize > BLOCK_4X4 && xd->bd == 8; - if (use_ml_based_breakout) { - if (ml_predict_breakout(cpi, bsize, x, &this_rdc, - pb_source_variance)) { - do_square_split = 0; - do_rectangular_split = 0; - } - } - - // If all y, u, v transform blocks in this partition are skippable, - // and the dist & rate are within the thresholds, the partition - // search is terminated for current branch of the partition search - // tree. The dist & rate thresholds are set to 0 at speed 0 to - // disable the early termination at that speed. - if (best_rdc.dist < dist_breakout_thr && - best_rdc.rate < rate_breakout_thr) { - do_square_split = 0; - do_rectangular_split = 0; - } - } - -#if CONFIG_FP_MB_STATS - // Check if every 16x16 first pass block statistics has zero - // motion and the corresponding first pass residue is small enough. - // If that is the case, check the difference variance between the - // current frame and the last frame. If the variance is small enough, - // stop further splitting in RD optimization - if (cpi->use_fp_mb_stats && do_square_split && - cm->base_qindex > qindex_skip_threshold_lookup[bsize]) { - int mb_row = mi_row >> 1; - int mb_col = mi_col >> 1; - int mb_row_end = - AOMMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows); - int mb_col_end = - AOMMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols); - int r, c; - - int skip = 1; - for (r = mb_row; r < mb_row_end; r++) { - for (c = mb_col; c < mb_col_end; c++) { - const int mb_index = r * cm->mb_cols + c; - if (!(cpi->twopass.this_frame_mb_stats[mb_index] & - FPMB_MOTION_ZERO_MASK) || - !(cpi->twopass.this_frame_mb_stats[mb_index] & - FPMB_ERROR_SMALL_MASK)) { - skip = 0; - break; - } - } - if (skip == 0) { - break; - } - } - if (skip) { - if (src_diff_var == UINT_MAX) { - set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); - src_diff_var = get_sby_perpixel_diff_variance( - cpi, &x->plane[0].src, mi_row, mi_col, bsize); - } - if (src_diff_var < 8) { - do_square_split = 0; - do_rectangular_split = 0; - } - } - } -#endif - } - } - - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - } - - // store estimated motion vector - if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none); - - // PARTITION_SPLIT - if (do_square_split) { - av1_init_rd_stats(&sum_rdc); - subsize = get_partition_subsize(bsize, PARTITION_SPLIT); - sum_rdc.rate = partition_cost[PARTITION_SPLIT]; - sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); - - int idx; - for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) { - const int x_idx = (idx & 1) * mi_step; - const int y_idx = (idx >> 1) * mi_step; - - if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) - continue; - - if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none); - - pc_tree->split[idx]->index = idx; - int64_t *p_split_rd = &split_rd[idx]; - int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX - ? INT64_MAX - : (best_rdc.rdcost - sum_rdc.rdcost); - if (cpi->sf.prune_ref_frame_for_rect_partitions) - pc_tree->split[idx]->none.rate = INT_MAX; - rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, - subsize, &this_rdc, best_remain_rdcost, - pc_tree->split[idx], p_split_rd); - - if (this_rdc.rate == INT_MAX) { - sum_rdc.rdcost = INT64_MAX; - break; - } else { - sum_rdc.rate += this_rdc.rate; - sum_rdc.dist += this_rdc.dist; - sum_rdc.rdcost += this_rdc.rdcost; - if (cpi->sf.prune_ref_frame_for_rect_partitions && - pc_tree->split[idx]->none.rate != INT_MAX) { - const int ref_type = - av1_ref_frame_type(pc_tree->split[idx]->none.mic.ref_frame); - ref_frames_used[idx] |= (1 << ref_type); - } - if (idx <= 1 && (bsize <= BLOCK_8X8 || - pc_tree->split[idx]->partitioning == PARTITION_NONE)) { - const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none.mic; - const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - // Neither palette mode nor cfl predicted - if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) { - if (mbmi->uv_mode != UV_CFL_PRED) split_ctx_is_ready[idx] = 1; - } - } - } - } - const int reached_last_index = (idx == 4); - - if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) { - sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); - - if (sum_rdc.rdcost < best_rdc.rdcost) { - best_rdc = sum_rdc; - pc_tree->partitioning = PARTITION_SPLIT; - } - } else if (cpi->sf.less_rectangular_check_level > 0) { - // skip rectangular partition test when larger block size - // gives better rd cost - if (cpi->sf.less_rectangular_check_level == 2 || idx <= 2) - do_rectangular_split &= !partition_none_allowed; - } - - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - } // if (do_split) - - pc_tree->horizontal[0].skip_ref_frame_mask = 0; - pc_tree->horizontal[1].skip_ref_frame_mask = 0; - pc_tree->vertical[0].skip_ref_frame_mask = 0; - pc_tree->vertical[1].skip_ref_frame_mask = 0; - if (cpi->sf.prune_ref_frame_for_rect_partitions) { - int used_frames; - used_frames = ref_frames_used[0] | ref_frames_used[1]; - if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames; - used_frames = ref_frames_used[2] | ref_frames_used[3]; - if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames; - used_frames = ref_frames_used[0] | ref_frames_used[2]; - if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames; - used_frames = ref_frames_used[1] | ref_frames_used[3]; - if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames; - } - - int prune_horz = 0; - int prune_vert = 0; - if (cpi->sf.ml_prune_rect_partition && !frame_is_intra_only(cm) && - (partition_horz_allowed || partition_vert_allowed)) { - av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes); - ml_prune_rect_partition(cpi, x, bsize, best_rdc.rdcost, cur_none_rd, - split_rd, &prune_horz, &prune_vert); - } - - // PARTITION_HORZ - if (partition_horz_allowed && !prune_horz && - (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step))) { - av1_init_rd_stats(&sum_rdc); - subsize = get_partition_subsize(bsize, PARTITION_HORZ); - if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none); - if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && - partition_none_allowed) { - pc_tree->horizontal[0].pred_interp_filter = - av1_extract_interp_filter(ctx_none->mic.interp_filters, 0); - } - int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX - ? INT64_MAX - : (best_rdc.rdcost - sum_rdc.rdcost); - sum_rdc.rate = partition_cost[PARTITION_HORZ]; - sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, - PARTITION_HORZ, subsize, &pc_tree->horizontal[0], - best_remain_rdcost); - - if (this_rdc.rate == INT_MAX) { - sum_rdc.rdcost = INT64_MAX; - } else { - sum_rdc.rate += this_rdc.rate; - sum_rdc.dist += this_rdc.dist; - sum_rdc.rdcost += this_rdc.rdcost; - } - horz_rd[0] = this_rdc.rdcost; - - if (sum_rdc.rdcost < best_rdc.rdcost && has_rows) { - const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0]; - const MB_MODE_INFO *const mbmi = &pc_tree->horizontal[0].mic; - const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - // Neither palette mode nor cfl predicted - if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) { - if (mbmi->uv_mode != UV_CFL_PRED) horz_ctx_is_ready = 1; - } - update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1); - encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, - subsize, NULL); - - if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_h); - - if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && - partition_none_allowed) { - pc_tree->horizontal[1].pred_interp_filter = - av1_extract_interp_filter(ctx_h->mic.interp_filters, 0); - } - rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, - PARTITION_HORZ, subsize, &pc_tree->horizontal[1], - best_rdc.rdcost - sum_rdc.rdcost); - horz_rd[1] = this_rdc.rdcost; - - if (this_rdc.rate == INT_MAX) { - sum_rdc.rdcost = INT64_MAX; - } else { - sum_rdc.rate += this_rdc.rate; - sum_rdc.dist += this_rdc.dist; - sum_rdc.rdcost += this_rdc.rdcost; - } - } - - if (sum_rdc.rdcost < best_rdc.rdcost) { - sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); - if (sum_rdc.rdcost < best_rdc.rdcost) { - best_rdc = sum_rdc; - pc_tree->partitioning = PARTITION_HORZ; - } - } - - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - } - - // PARTITION_VERT - if (partition_vert_allowed && !prune_vert && - (do_rectangular_split || active_v_edge(cpi, mi_col, mi_step))) { - av1_init_rd_stats(&sum_rdc); - subsize = get_partition_subsize(bsize, PARTITION_VERT); - - if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none); - - if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && - partition_none_allowed) { - pc_tree->vertical[0].pred_interp_filter = - av1_extract_interp_filter(ctx_none->mic.interp_filters, 0); - } - sum_rdc.rate = partition_cost[PARTITION_VERT]; - sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); - int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX - ? INT64_MAX - : (best_rdc.rdcost - sum_rdc.rdcost); - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, - PARTITION_VERT, subsize, &pc_tree->vertical[0], - best_remain_rdcost); - - if (this_rdc.rate == INT_MAX) { - sum_rdc.rdcost = INT64_MAX; - } else { - sum_rdc.rate += this_rdc.rate; - sum_rdc.dist += this_rdc.dist; - sum_rdc.rdcost += this_rdc.rdcost; - } - vert_rd[0] = this_rdc.rdcost; - const int64_t vert_max_rdcost = best_rdc.rdcost; - if (sum_rdc.rdcost < vert_max_rdcost && has_cols) { - const MB_MODE_INFO *const mbmi = &pc_tree->vertical[0].mic; - const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - // Neither palette mode nor cfl predicted - if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) { - if (mbmi->uv_mode != UV_CFL_PRED) vert_ctx_is_ready = 1; - } - update_state(cpi, tile_data, td, &pc_tree->vertical[0], mi_row, mi_col, - subsize, 1); - encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col, - subsize, NULL); - - if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none); - - if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && - partition_none_allowed) { - pc_tree->vertical[1].pred_interp_filter = - av1_extract_interp_filter(ctx_none->mic.interp_filters, 0); - } - rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, - PARTITION_VERT, subsize, &pc_tree->vertical[1], - best_rdc.rdcost - sum_rdc.rdcost); - vert_rd[1] = this_rdc.rdcost; - - if (this_rdc.rate == INT_MAX) { - sum_rdc.rdcost = INT64_MAX; - } else { - sum_rdc.rate += this_rdc.rate; - sum_rdc.dist += this_rdc.dist; - sum_rdc.rdcost += this_rdc.rdcost; - } - } - - if (sum_rdc.rdcost < best_rdc.rdcost) { - sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); - if (sum_rdc.rdcost < best_rdc.rdcost) { - best_rdc = sum_rdc; - pc_tree->partitioning = PARTITION_VERT; - } - } - - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - } - - if (pb_source_variance == UINT_MAX) { - av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes); - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - pb_source_variance = av1_high_get_sby_perpixel_variance( - cpi, &x->plane[0].src, bsize, xd->bd); - } else { - pb_source_variance = - av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); - } - } - - const int ext_partition_allowed = - do_rectangular_split && bsize > BLOCK_8X8 && partition_none_allowed; - - // The standard AB partitions are allowed whenever ext-partition-types are - // allowed - int horzab_partition_allowed = ext_partition_allowed; - int vertab_partition_allowed = ext_partition_allowed; - -#if CONFIG_DIST_8X8 - if (x->using_dist_8x8) { - if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8) { - horzab_partition_allowed = 0; - vertab_partition_allowed = 0; - } - } -#endif - - if (cpi->sf.prune_ext_partition_types_search_level) { - if (cpi->sf.prune_ext_partition_types_search_level == 1) { - // TODO(debargha,huisu@google.com): may need to tune the threshold for - // pb_source_variance. - horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ || - (pc_tree->partitioning == PARTITION_NONE && - pb_source_variance < 32) || - pc_tree->partitioning == PARTITION_SPLIT); - vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT || - (pc_tree->partitioning == PARTITION_NONE && - pb_source_variance < 32) || - pc_tree->partitioning == PARTITION_SPLIT); - } else { - horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ || - pc_tree->partitioning == PARTITION_SPLIT); - vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT || - pc_tree->partitioning == PARTITION_SPLIT); - } - horz_rd[0] = (horz_rd[0] < INT64_MAX ? horz_rd[0] : 0); - horz_rd[1] = (horz_rd[1] < INT64_MAX ? horz_rd[1] : 0); - vert_rd[0] = (vert_rd[0] < INT64_MAX ? vert_rd[0] : 0); - vert_rd[1] = (vert_rd[1] < INT64_MAX ? vert_rd[1] : 0); - split_rd[0] = (split_rd[0] < INT64_MAX ? split_rd[0] : 0); - split_rd[1] = (split_rd[1] < INT64_MAX ? split_rd[1] : 0); - split_rd[2] = (split_rd[2] < INT64_MAX ? split_rd[2] : 0); - split_rd[3] = (split_rd[3] < INT64_MAX ? split_rd[3] : 0); - } - int horza_partition_allowed = horzab_partition_allowed; - int horzb_partition_allowed = horzab_partition_allowed; - if (cpi->sf.prune_ext_partition_types_search_level) { - const int64_t horz_a_rd = horz_rd[1] + split_rd[0] + split_rd[1]; - const int64_t horz_b_rd = horz_rd[0] + split_rd[2] + split_rd[3]; - switch (cpi->sf.prune_ext_partition_types_search_level) { - case 1: - horza_partition_allowed &= (horz_a_rd / 16 * 14 < best_rdc.rdcost); - horzb_partition_allowed &= (horz_b_rd / 16 * 14 < best_rdc.rdcost); - break; - case 2: - default: - horza_partition_allowed &= (horz_a_rd / 16 * 15 < best_rdc.rdcost); - horzb_partition_allowed &= (horz_b_rd / 16 * 15 < best_rdc.rdcost); - break; - } - } - - int verta_partition_allowed = vertab_partition_allowed; - int vertb_partition_allowed = vertab_partition_allowed; - if (cpi->sf.prune_ext_partition_types_search_level) { - const int64_t vert_a_rd = vert_rd[1] + split_rd[0] + split_rd[2]; - const int64_t vert_b_rd = vert_rd[0] + split_rd[1] + split_rd[3]; - switch (cpi->sf.prune_ext_partition_types_search_level) { - case 1: - verta_partition_allowed &= (vert_a_rd / 16 * 14 < best_rdc.rdcost); - vertb_partition_allowed &= (vert_b_rd / 16 * 14 < best_rdc.rdcost); - break; - case 2: - default: - verta_partition_allowed &= (vert_a_rd / 16 * 15 < best_rdc.rdcost); - vertb_partition_allowed &= (vert_b_rd / 16 * 15 < best_rdc.rdcost); - break; - } - } - - if (cpi->sf.ml_prune_ab_partition && ext_partition_allowed && - partition_horz_allowed && partition_vert_allowed) { - // TODO(huisu@google.com): x->source_variance may not be the current block's - // variance. The correct one to use is pb_source_variance. - // Need to re-train the model to fix it. - ml_prune_ab_partition(bsize, pc_tree->partitioning, - get_unsigned_bits(x->source_variance), - best_rdc.rdcost, horz_rd, vert_rd, split_rd, - &horza_partition_allowed, &horzb_partition_allowed, - &verta_partition_allowed, &vertb_partition_allowed); - } - - // PARTITION_HORZ_A - if (partition_horz_allowed && horza_partition_allowed) { - subsize = get_partition_subsize(bsize, PARTITION_HORZ_A); - pc_tree->horizontala[0].rd_mode_is_ready = 0; - pc_tree->horizontala[1].rd_mode_is_ready = 0; - pc_tree->horizontala[2].rd_mode_is_ready = 0; - if (split_ctx_is_ready[0]) { - av1_copy_tree_context(&pc_tree->horizontala[0], &pc_tree->split[0]->none); - pc_tree->horizontala[0].mic.partition = PARTITION_HORZ_A; - pc_tree->horizontala[0].rd_mode_is_ready = 1; - if (split_ctx_is_ready[1]) { - av1_copy_tree_context(&pc_tree->horizontala[1], - &pc_tree->split[1]->none); - pc_tree->horizontala[1].mic.partition = PARTITION_HORZ_A; - pc_tree->horizontala[1].rd_mode_is_ready = 1; - } - } - pc_tree->horizontala[0].skip_ref_frame_mask = 0; - pc_tree->horizontala[1].skip_ref_frame_mask = 0; - pc_tree->horizontala[2].skip_ref_frame_mask = 0; - if (cpi->sf.prune_ref_frame_for_rect_partitions) { - int used_frames; - used_frames = ref_frames_used[0]; - if (used_frames) - pc_tree->horizontala[0].skip_ref_frame_mask = ~used_frames; - used_frames = ref_frames_used[1]; - if (used_frames) - pc_tree->horizontala[1].skip_ref_frame_mask = ~used_frames; - used_frames = ref_frames_used[2] | ref_frames_used[3]; - if (used_frames) - pc_tree->horizontala[2].skip_ref_frame_mask = ~used_frames; - } - rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc, - pc_tree->horizontala, ctx_none, mi_row, mi_col, bsize, - PARTITION_HORZ_A, mi_row, mi_col, bsize2, mi_row, - mi_col + mi_step, bsize2, mi_row + mi_step, mi_col, - subsize); - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - } - // PARTITION_HORZ_B - if (partition_horz_allowed && horzb_partition_allowed) { - subsize = get_partition_subsize(bsize, PARTITION_HORZ_B); - pc_tree->horizontalb[0].rd_mode_is_ready = 0; - pc_tree->horizontalb[1].rd_mode_is_ready = 0; - pc_tree->horizontalb[2].rd_mode_is_ready = 0; - if (horz_ctx_is_ready) { - av1_copy_tree_context(&pc_tree->horizontalb[0], &pc_tree->horizontal[0]); - pc_tree->horizontalb[0].mic.partition = PARTITION_HORZ_B; - pc_tree->horizontalb[0].rd_mode_is_ready = 1; - } - pc_tree->horizontalb[0].skip_ref_frame_mask = 0; - pc_tree->horizontalb[1].skip_ref_frame_mask = 0; - pc_tree->horizontalb[2].skip_ref_frame_mask = 0; - if (cpi->sf.prune_ref_frame_for_rect_partitions) { - int used_frames; - used_frames = ref_frames_used[0] | ref_frames_used[1]; - if (used_frames) - pc_tree->horizontalb[0].skip_ref_frame_mask = ~used_frames; - used_frames = ref_frames_used[2]; - if (used_frames) - pc_tree->horizontalb[1].skip_ref_frame_mask = ~used_frames; - used_frames = ref_frames_used[3]; - if (used_frames) - pc_tree->horizontalb[2].skip_ref_frame_mask = ~used_frames; - } - rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc, - pc_tree->horizontalb, ctx_none, mi_row, mi_col, bsize, - PARTITION_HORZ_B, mi_row, mi_col, subsize, - mi_row + mi_step, mi_col, bsize2, mi_row + mi_step, - mi_col + mi_step, bsize2); - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - } - - // PARTITION_VERT_A - if (partition_vert_allowed && verta_partition_allowed) { - subsize = get_partition_subsize(bsize, PARTITION_VERT_A); - pc_tree->verticala[0].rd_mode_is_ready = 0; - pc_tree->verticala[1].rd_mode_is_ready = 0; - pc_tree->verticala[2].rd_mode_is_ready = 0; - if (split_ctx_is_ready[0]) { - av1_copy_tree_context(&pc_tree->verticala[0], &pc_tree->split[0]->none); - pc_tree->verticala[0].mic.partition = PARTITION_VERT_A; - pc_tree->verticala[0].rd_mode_is_ready = 1; - } - pc_tree->verticala[0].skip_ref_frame_mask = 0; - pc_tree->verticala[1].skip_ref_frame_mask = 0; - pc_tree->verticala[2].skip_ref_frame_mask = 0; - if (cpi->sf.prune_ref_frame_for_rect_partitions) { - int used_frames; - used_frames = ref_frames_used[0]; - if (used_frames) pc_tree->verticala[0].skip_ref_frame_mask = ~used_frames; - used_frames = ref_frames_used[2]; - if (used_frames) pc_tree->verticala[1].skip_ref_frame_mask = ~used_frames; - used_frames = ref_frames_used[1] | ref_frames_used[3]; - if (used_frames) pc_tree->verticala[2].skip_ref_frame_mask = ~used_frames; - } - rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc, - pc_tree->verticala, ctx_none, mi_row, mi_col, bsize, - PARTITION_VERT_A, mi_row, mi_col, bsize2, - mi_row + mi_step, mi_col, bsize2, mi_row, - mi_col + mi_step, subsize); - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - } - // PARTITION_VERT_B - if (partition_vert_allowed && vertb_partition_allowed) { - subsize = get_partition_subsize(bsize, PARTITION_VERT_B); - pc_tree->verticalb[0].rd_mode_is_ready = 0; - pc_tree->verticalb[1].rd_mode_is_ready = 0; - pc_tree->verticalb[2].rd_mode_is_ready = 0; - if (vert_ctx_is_ready) { - av1_copy_tree_context(&pc_tree->verticalb[0], &pc_tree->vertical[0]); - pc_tree->verticalb[0].mic.partition = PARTITION_VERT_B; - pc_tree->verticalb[0].rd_mode_is_ready = 1; - } - pc_tree->verticalb[0].skip_ref_frame_mask = 0; - pc_tree->verticalb[1].skip_ref_frame_mask = 0; - pc_tree->verticalb[2].skip_ref_frame_mask = 0; - if (cpi->sf.prune_ref_frame_for_rect_partitions) { - int used_frames; - used_frames = ref_frames_used[0] | ref_frames_used[2]; - if (used_frames) pc_tree->verticalb[0].skip_ref_frame_mask = ~used_frames; - used_frames = ref_frames_used[1]; - if (used_frames) pc_tree->verticalb[1].skip_ref_frame_mask = ~used_frames; - used_frames = ref_frames_used[3]; - if (used_frames) pc_tree->verticalb[2].skip_ref_frame_mask = ~used_frames; - } - rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc, - pc_tree->verticalb, ctx_none, mi_row, mi_col, bsize, - PARTITION_VERT_B, mi_row, mi_col, subsize, mi_row, - mi_col + mi_step, bsize2, mi_row + mi_step, - mi_col + mi_step, bsize2); - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - } - - // partition4_allowed is 1 if we can use a PARTITION_HORZ_4 or - // PARTITION_VERT_4 for this block. This is almost the same as - // ext_partition_allowed, except that we don't allow 128x32 or 32x128 blocks, - // so we require that bsize is not BLOCK_128X128. - const int partition4_allowed = - ext_partition_allowed && bsize != BLOCK_128X128; - int partition_horz4_allowed = partition4_allowed && partition_horz_allowed; - int partition_vert4_allowed = partition4_allowed && partition_vert_allowed; - if (cpi->sf.prune_ext_partition_types_search_level == 2) { - partition_horz4_allowed &= (pc_tree->partitioning == PARTITION_HORZ || - pc_tree->partitioning == PARTITION_HORZ_A || - pc_tree->partitioning == PARTITION_HORZ_B || - pc_tree->partitioning == PARTITION_SPLIT || - pc_tree->partitioning == PARTITION_NONE); - partition_vert4_allowed &= (pc_tree->partitioning == PARTITION_VERT || - pc_tree->partitioning == PARTITION_VERT_A || - pc_tree->partitioning == PARTITION_VERT_B || - pc_tree->partitioning == PARTITION_SPLIT || - pc_tree->partitioning == PARTITION_NONE); - } - if (cpi->sf.ml_prune_4_partition && partition4_allowed && - partition_horz_allowed && partition_vert_allowed) { - ml_prune_4_partition(cpi, x, bsize, pc_tree->partitioning, best_rdc.rdcost, - horz_rd, vert_rd, split_rd, &partition_horz4_allowed, - &partition_vert4_allowed, pb_source_variance, mi_row, - mi_col); - } - -#if CONFIG_DIST_8X8 - if (x->using_dist_8x8) { - if (block_size_high[bsize] <= 16 || block_size_wide[bsize] <= 16) { - partition_horz4_allowed = 0; - partition_vert4_allowed = 0; - } - } -#endif - - // PARTITION_HORZ_4 - if (partition_horz4_allowed && has_rows && - (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step))) { - av1_init_rd_stats(&sum_rdc); - const int quarter_step = mi_size_high[bsize] / 4; - PICK_MODE_CONTEXT *ctx_prev = ctx_none; - - subsize = get_partition_subsize(bsize, PARTITION_HORZ_4); - sum_rdc.rate = partition_cost[PARTITION_HORZ_4]; - sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); - - for (int i = 0; i < 4; ++i) { - const int this_mi_row = mi_row + i * quarter_step; - - if (i > 0 && this_mi_row >= cm->mi_rows) break; - - PICK_MODE_CONTEXT *ctx_this = &pc_tree->horizontal4[i]; - - ctx_this->rd_mode_is_ready = 0; - ctx_this->skip_ref_frame_mask = 0; - if (cpi->sf.prune_ref_frame_for_rect_partitions) { - const int used_frames = i <= 1 - ? (ref_frames_used[0] | ref_frames_used[1]) - : (ref_frames_used[2] | ref_frames_used[3]); - if (used_frames) ctx_this->skip_ref_frame_mask = ~used_frames; - } - if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), this_mi_row, - mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc, - PARTITION_HORZ_4, ctx_prev, ctx_this)) - break; - - ctx_prev = ctx_this; - } - - if (sum_rdc.rdcost < best_rdc.rdcost) { - sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); - if (sum_rdc.rdcost < best_rdc.rdcost) { - best_rdc = sum_rdc; - pc_tree->partitioning = PARTITION_HORZ_4; - } - } - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - } - - // PARTITION_VERT_4 - if (partition_vert4_allowed && has_cols && - (do_rectangular_split || active_v_edge(cpi, mi_row, mi_step))) { - av1_init_rd_stats(&sum_rdc); - const int quarter_step = mi_size_wide[bsize] / 4; - PICK_MODE_CONTEXT *ctx_prev = ctx_none; - - subsize = get_partition_subsize(bsize, PARTITION_VERT_4); - sum_rdc.rate = partition_cost[PARTITION_VERT_4]; - sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0); - - for (int i = 0; i < 4; ++i) { - const int this_mi_col = mi_col + i * quarter_step; - - if (i > 0 && this_mi_col >= cm->mi_cols) break; - - PICK_MODE_CONTEXT *ctx_this = &pc_tree->vertical4[i]; - - ctx_this->rd_mode_is_ready = 0; - ctx_this->skip_ref_frame_mask = 0; - if (cpi->sf.prune_ref_frame_for_rect_partitions) { - const int used_frames = i <= 1 - ? (ref_frames_used[0] | ref_frames_used[2]) - : (ref_frames_used[1] | ref_frames_used[3]); - if (used_frames) ctx_this->skip_ref_frame_mask = ~used_frames; - } - if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), mi_row, - this_mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc, - PARTITION_VERT_4, ctx_prev, ctx_this)) - break; - - ctx_prev = ctx_this; - } - - if (sum_rdc.rdcost < best_rdc.rdcost) { - sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist); - if (sum_rdc.rdcost < best_rdc.rdcost) { - best_rdc = sum_rdc; - pc_tree->partitioning = PARTITION_VERT_4; - } - } - restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes); - } - - if (bsize == cm->seq_params.sb_size && best_rdc.rate == INT_MAX) { - // Did not find a valid partition, go back and search again, with less - // constraint on which partition types to search. - x->must_find_valid_partition = 1; - goto BEGIN_PARTITION_SEARCH; - } - - // TODO(jbb): This code added so that we avoid static analysis - // warning related to the fact that best_rd isn't used after this - // point. This code should be refactored so that the duplicate - // checks occur in some sub function and thus are used... - (void)best_rd; - *rd_cost = best_rdc; - - if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && - pc_tree->index != 3) { - if (bsize == cm->seq_params.sb_size) { - x->cb_offset = 0; - encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize, - pc_tree, NULL); - } else { - encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize, - pc_tree, NULL); - } - } - - if (bsize == cm->seq_params.sb_size) { - assert(best_rdc.rate < INT_MAX); - assert(best_rdc.dist < INT64_MAX); - } else { - assert(tp_orig == *tp); - } -} - -// Set all the counters as max. -static void init_first_partition_pass_stats_tables( - FIRST_PARTITION_PASS_STATS *stats) { - for (int i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) { - memset(stats[i].ref0_counts, 0xff, sizeof(stats[i].ref0_counts)); - memset(stats[i].ref1_counts, 0xff, sizeof(stats[i].ref1_counts)); - stats[i].sample_counts = INT_MAX; - } -} - -// clear pc_tree_stats -static INLINE void clear_pc_tree_stats(PC_TREE *pt) { - if (pt == NULL) return; - pt->pc_tree_stats.valid = 0; - for (int i = 0; i < 4; ++i) { - clear_pc_tree_stats(pt->split[i]); - } -} - -// Minimum number of samples to trigger the -// mode_pruning_based_on_two_pass_partition_search feature. -#define FIRST_PARTITION_PASS_MIN_SAMPLES 16 - -static void encode_rd_sb_row(AV1_COMP *cpi, ThreadData *td, - TileDataEnc *tile_data, int mi_row, - TOKENEXTRA **tp) { - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - const TileInfo *const tile_info = &tile_data->tile_info; - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - SPEED_FEATURES *const sf = &cpi->sf; - const int leaf_nodes = 256; - - // Initialize the left context for the new SB row - av1_zero_left_context(xd); - - // Reset delta for every tile - if (mi_row == tile_info->mi_row_start) { - if (cm->delta_q_present_flag) xd->current_qindex = cm->base_qindex; - if (cm->delta_lf_present_flag) { - av1_reset_loop_filter_delta(xd, av1_num_planes(cm)); - } - } - - PC_TREE *const pc_root = - td->pc_root[cm->seq_params.mib_size_log2 - MIN_MIB_SIZE_LOG2]; - // Code each SB in the row - for (int mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end; - mi_col += cm->seq_params.mib_size) { - av1_fill_coeff_costs(&td->mb, xd->tile_ctx, num_planes); - av1_fill_mode_rates(cm, x, xd->tile_ctx); - - if (sf->adaptive_pred_interp_filter) { - for (int i = 0; i < leaf_nodes; ++i) { - td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; - td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; - td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; - td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE; - } - } - - x->mb_rd_record.num = x->mb_rd_record.index_start = 0; - - av1_zero(x->txb_rd_record_8X8); - av1_zero(x->txb_rd_record_16X16); - av1_zero(x->txb_rd_record_32X32); - av1_zero(x->txb_rd_record_64X64); - av1_zero(x->txb_rd_record_intra); - - av1_zero(x->pred_mv); - pc_root->index = 0; - - const struct segmentation *const seg = &cm->seg; - int seg_skip = 0; - if (seg->enabled) { - const uint8_t *const map = - seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; - const int segment_id = - map ? get_segment_id(cm, map, cm->seq_params.sb_size, mi_row, mi_col) - : 0; - seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); - } - xd->cur_frame_force_integer_mv = cm->cur_frame_force_integer_mv; - - x->sb_energy_level = 0; - if (cm->delta_q_present_flag) { - // Delta-q modulation based on variance - av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes); - - int offset_qindex; - if (DELTAQ_MODULATION == 1) { - const int block_wavelet_energy_level = - av1_block_wavelet_energy_level(cpi, x, cm->seq_params.sb_size); - x->sb_energy_level = block_wavelet_energy_level; - offset_qindex = av1_compute_deltaq_from_energy_level( - cpi, block_wavelet_energy_level); - } else { - const int block_var_level = - av1_log_block_var(cpi, x, cm->seq_params.sb_size); - x->sb_energy_level = block_var_level; - offset_qindex = - av1_compute_deltaq_from_energy_level(cpi, block_var_level); - } - const int qmask = ~(cm->delta_q_res - 1); - int current_qindex = clamp(cm->base_qindex + offset_qindex, - cm->delta_q_res, 256 - cm->delta_q_res); - current_qindex = - ((current_qindex - cm->base_qindex + cm->delta_q_res / 2) & qmask) + - cm->base_qindex; - assert(current_qindex > 0); - - xd->delta_qindex = current_qindex - cm->base_qindex; - set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->seq_params.sb_size); - xd->mi[0]->current_qindex = current_qindex; - av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id); - if (cpi->oxcf.deltaq_mode == DELTA_Q_LF) { - const int lfmask = ~(cm->delta_lf_res - 1); - const int delta_lf_from_base = - ((offset_qindex / 2 + cm->delta_lf_res / 2) & lfmask); - - // pre-set the delta lf for loop filter. Note that this value is set - // before mi is assigned for each block in current superblock - for (int j = 0; - j < AOMMIN(cm->seq_params.mib_size, cm->mi_rows - mi_row); j++) { - for (int k = 0; - k < AOMMIN(cm->seq_params.mib_size, cm->mi_cols - mi_col); k++) { - cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)] - .delta_lf_from_base = - clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER); - const int frame_lf_count = - av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2; - for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) { - cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)] - .delta_lf[lf_id] = - clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER); - } - } - } - } - } - - int dummy_rate; - int64_t dummy_dist; - RD_STATS dummy_rdc; - const int idx_str = cm->mi_stride * mi_row + mi_col; - MB_MODE_INFO **mi = cm->mi_grid_visible + idx_str; - x->source_variance = UINT_MAX; - if (sf->partition_search_type == FIXED_PARTITION || seg_skip) { - set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->seq_params.sb_size); - const BLOCK_SIZE bsize = - seg_skip ? cm->seq_params.sb_size : sf->always_this_block_size; - set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); - rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, - cm->seq_params.sb_size, &dummy_rate, &dummy_dist, 1, - pc_root); - } else if (cpi->partition_search_skippable_frame) { - set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->seq_params.sb_size); - const BLOCK_SIZE bsize = - get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col); - set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); - rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, - cm->seq_params.sb_size, &dummy_rate, &dummy_dist, 1, - pc_root); - } else { - // If required set upper and lower partition size limits - if (sf->auto_min_max_partition_size) { - set_offsets(cpi, tile_info, x, mi_row, mi_col, cm->seq_params.sb_size); - rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, - &x->min_partition_size, &x->max_partition_size); - } - - reset_partition(pc_root, cm->seq_params.sb_size); - x->use_cb_search_range = 0; - init_first_partition_pass_stats_tables(x->first_partition_pass_stats); - // Do the first pass if we need two pass partition search - if (cpi->sf.two_pass_partition_search && - cpi->sf.use_square_partition_only_threshold > BLOCK_4X4 && - mi_row + mi_size_high[cm->seq_params.sb_size] < cm->mi_rows && - mi_col + mi_size_wide[cm->seq_params.sb_size] < cm->mi_cols && - cm->frame_type != KEY_FRAME) { - x->cb_partition_scan = 1; - // Reset the stats tables. - if (sf->mode_pruning_based_on_two_pass_partition_search) - av1_zero(x->first_partition_pass_stats); - clear_pc_tree_stats(pc_root); - rd_pick_sqr_partition(cpi, td, tile_data, tp, mi_row, mi_col, - cm->seq_params.sb_size, &dummy_rdc, INT64_MAX, - pc_root, NULL); - x->cb_partition_scan = 0; - - x->source_variance = UINT_MAX; - if (sf->adaptive_pred_interp_filter) { - for (int i = 0; i < leaf_nodes; ++i) { - td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; - td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; - td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; - td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE; - } - } - - x->mb_rd_record.num = x->mb_rd_record.index_start = 0; - av1_zero(x->txb_rd_record_8X8); - av1_zero(x->txb_rd_record_16X16); - av1_zero(x->txb_rd_record_32X32); - av1_zero(x->txb_rd_record_64X64); - av1_zero(x->txb_rd_record_intra); - av1_zero(x->pred_mv); - pc_root->index = 0; - - for (int idy = 0; idy < mi_size_high[cm->seq_params.sb_size]; ++idy) { - for (int idx = 0; idx < mi_size_wide[cm->seq_params.sb_size]; ++idx) { - const int offset = cm->mi_stride * (mi_row + idy) + (mi_col + idx); - cm->mi_grid_visible[offset] = 0; - } - } - - x->use_cb_search_range = 1; - - if (sf->mode_pruning_based_on_two_pass_partition_search) { - for (int i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) { - FIRST_PARTITION_PASS_STATS *const stat = - &x->first_partition_pass_stats[i]; - if (stat->sample_counts < FIRST_PARTITION_PASS_MIN_SAMPLES) { - // If there are not enough samples collected, make all available. - memset(stat->ref0_counts, 0xff, sizeof(stat->ref0_counts)); - memset(stat->ref1_counts, 0xff, sizeof(stat->ref1_counts)); - } else if (sf->selective_ref_frame < 2) { - // ALTREF2_FRAME and BWDREF_FRAME may be skipped during the - // initial partition scan, so we don't eliminate them. - stat->ref0_counts[ALTREF2_FRAME] = 0xff; - stat->ref1_counts[ALTREF2_FRAME] = 0xff; - stat->ref0_counts[BWDREF_FRAME] = 0xff; - stat->ref1_counts[BWDREF_FRAME] = 0xff; - } - } - } - } - - rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, - cm->seq_params.sb_size, &dummy_rdc, INT64_MAX, pc_root, - NULL); - } -#if CONFIG_COLLECT_INTER_MODE_RD_STATS - // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile. - if (cpi->sf.inter_mode_rd_model_estimation && cm->tile_cols == 1 && - cm->tile_rows == 1) { - av1_inter_mode_data_fit(tile_data, x->rdmult); - } -#endif - } -} - -static void init_encode_frame_mb_context(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCK *const x = &cpi->td.mb; - MACROBLOCKD *const xd = &x->e_mbd; - - // Copy data over into macro block data structures. - av1_setup_src_planes(x, cpi->source, 0, 0, num_planes); - - av1_setup_block_planes(xd, cm->seq_params.subsampling_x, - cm->seq_params.subsampling_y, num_planes); -} - -static MV_REFERENCE_FRAME get_frame_type(const AV1_COMP *cpi) { - if (frame_is_intra_only(&cpi->common)) return INTRA_FRAME; - // We will not update the golden frame with an internal overlay frame - else if ((cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame) || - cpi->rc.is_src_frame_ext_arf) - return ALTREF_FRAME; - else if (cpi->refresh_golden_frame || cpi->refresh_alt2_ref_frame || - cpi->refresh_alt_ref_frame) - return GOLDEN_FRAME; - else - // TODO(zoeliu): To investigate whether a frame_type other than - // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately. - return LAST_FRAME; -} - -static TX_MODE select_tx_mode(const AV1_COMP *cpi) { - if (cpi->common.coded_lossless) return ONLY_4X4; - if (cpi->sf.tx_size_search_method == USE_LARGESTALL) - return TX_MODE_LARGEST; - else if (cpi->sf.tx_size_search_method == USE_FULL_RD || - cpi->sf.tx_size_search_method == USE_FAST_RD) - return TX_MODE_SELECT; - else - return cpi->common.tx_mode; -} - -void av1_alloc_tile_data(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - const int tile_cols = cm->tile_cols; - const int tile_rows = cm->tile_rows; - int tile_col, tile_row; - - if (cpi->tile_data != NULL) aom_free(cpi->tile_data); - CHECK_MEM_ERROR( - cm, cpi->tile_data, - aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data))); - cpi->allocated_tiles = tile_cols * tile_rows; - - for (tile_row = 0; tile_row < tile_rows; ++tile_row) - for (tile_col = 0; tile_col < tile_cols; ++tile_col) { - TileDataEnc *const tile_data = - &cpi->tile_data[tile_row * tile_cols + tile_col]; - int i, j; - for (i = 0; i < BLOCK_SIZES_ALL; ++i) { - for (j = 0; j < MAX_MODES; ++j) { - tile_data->thresh_freq_fact[i][j] = 32; - tile_data->mode_map[i][j] = j; - } - } - } -} - -void av1_init_tile_data(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - const int tile_cols = cm->tile_cols; - const int tile_rows = cm->tile_rows; - int tile_col, tile_row; - TOKENEXTRA *pre_tok = cpi->tile_tok[0][0]; - TOKENLIST *tplist = cpi->tplist[0][0]; - unsigned int tile_tok = 0; - int tplist_count = 0; - - for (tile_row = 0; tile_row < tile_rows; ++tile_row) { - for (tile_col = 0; tile_col < tile_cols; ++tile_col) { - TileDataEnc *const tile_data = - &cpi->tile_data[tile_row * tile_cols + tile_col]; - TileInfo *const tile_info = &tile_data->tile_info; - av1_tile_init(tile_info, cm, tile_row, tile_col); - - cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok; - pre_tok = cpi->tile_tok[tile_row][tile_col]; - tile_tok = allocated_tokens( - *tile_info, cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes); - cpi->tplist[tile_row][tile_col] = tplist + tplist_count; - tplist = cpi->tplist[tile_row][tile_col]; - tplist_count = av1_get_sb_rows_in_tile(cm, tile_data->tile_info); - tile_data->allow_update_cdf = !cm->large_scale_tile; - tile_data->allow_update_cdf = - tile_data->allow_update_cdf && !cm->disable_cdf_update; - } - } -} - -void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row, - int tile_col, int mi_row) { - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - const int tile_cols = cm->tile_cols; - TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; - const TileInfo *const tile_info = &this_tile->tile_info; - TOKENEXTRA *tok = NULL; - int sb_row_in_tile; - int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2; - - int num_mb_rows_in_sb = - ((1 << (cm->seq_params.mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4; - - sb_row_in_tile = - (mi_row - tile_info->mi_row_start) >> cm->seq_params.mib_size_log2; - - get_start_tok(cpi, tile_row, tile_col, mi_row, &tok, - cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes); - cpi->tplist[tile_row][tile_col][sb_row_in_tile].start = tok; - - encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); - - cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop = tok; - cpi->tplist[tile_row][tile_col][sb_row_in_tile].count = - (unsigned int)(cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop - - cpi->tplist[tile_row][tile_col][sb_row_in_tile].start); - - assert( - (unsigned int)(tok - - cpi->tplist[tile_row][tile_col][sb_row_in_tile].start) <= - get_token_alloc(num_mb_rows_in_sb, tile_mb_cols, - cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes)); - - (void)tile_mb_cols; - (void)num_mb_rows_in_sb; -} - -void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row, - int tile_col) { - AV1_COMMON *const cm = &cpi->common; - TileDataEnc *const this_tile = - &cpi->tile_data[tile_row * cm->tile_cols + tile_col]; - const TileInfo *const tile_info = &this_tile->tile_info; - int mi_row; - -#if CONFIG_COLLECT_INTER_MODE_RD_STATS - av1_inter_mode_data_init(this_tile); -#endif - - av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start, - tile_info->mi_col_end, tile_row); - av1_init_above_context(cm, &td->mb.e_mbd, tile_row); - - // Set up pointers to per thread motion search counters. - this_tile->m_search_count = 0; // Count of motion search hits. - this_tile->ex_search_count = 0; // Exhaustive mesh search hits. - td->mb.m_search_count_ptr = &this_tile->m_search_count; - td->mb.ex_search_count_ptr = &this_tile->ex_search_count; - this_tile->tctx = *cm->fc; - td->mb.e_mbd.tile_ctx = &this_tile->tctx; - - cfl_init(&td->mb.e_mbd.cfl, &cm->seq_params); - - av1_crc32c_calculator_init(&td->mb.mb_rd_record.crc_calculator); - - td->intrabc_used_this_tile = 0; - - for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end; - mi_row += cm->seq_params.mib_size) { - av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row); - } -} - -static void encode_tiles(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - const int tile_cols = cm->tile_cols; - const int tile_rows = cm->tile_rows; - int tile_col, tile_row; - - if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) - av1_alloc_tile_data(cpi); - - av1_init_tile_data(cpi); - - for (tile_row = 0; tile_row < tile_rows; ++tile_row) { - for (tile_col = 0; tile_col < tile_cols; ++tile_col) { - av1_encode_tile(cpi, &cpi->td, tile_row, tile_col); - cpi->intrabc_used |= cpi->td.intrabc_used_this_tile; - } - } -} - -#if CONFIG_FP_MB_STATS -static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats, - AV1_COMMON *cm, uint8_t **this_frame_mb_stats) { - uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start + - cm->current_video_frame * cm->MBs * sizeof(uint8_t); - - if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF; - - *this_frame_mb_stats = mb_stats_in; - - return 1; -} -#endif - -#define GLOBAL_TRANS_TYPES_ENC 3 // highest motion model to search -static int gm_get_params_cost(const WarpedMotionParams *gm, - const WarpedMotionParams *ref_gm, int allow_hp) { - int params_cost = 0; - int trans_bits, trans_prec_diff; - switch (gm->wmtype) { - case AFFINE: - case ROTZOOM: - params_cost += aom_count_signed_primitive_refsubexpfin( - GM_ALPHA_MAX + 1, SUBEXPFIN_K, - (ref_gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS), - (gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS)); - params_cost += aom_count_signed_primitive_refsubexpfin( - GM_ALPHA_MAX + 1, SUBEXPFIN_K, - (ref_gm->wmmat[3] >> GM_ALPHA_PREC_DIFF), - (gm->wmmat[3] >> GM_ALPHA_PREC_DIFF)); - if (gm->wmtype >= AFFINE) { - params_cost += aom_count_signed_primitive_refsubexpfin( - GM_ALPHA_MAX + 1, SUBEXPFIN_K, - (ref_gm->wmmat[4] >> GM_ALPHA_PREC_DIFF), - (gm->wmmat[4] >> GM_ALPHA_PREC_DIFF)); - params_cost += aom_count_signed_primitive_refsubexpfin( - GM_ALPHA_MAX + 1, SUBEXPFIN_K, - (ref_gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) - - (1 << GM_ALPHA_PREC_BITS), - (gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS)); - } - AOM_FALLTHROUGH_INTENDED; - case TRANSLATION: - trans_bits = (gm->wmtype == TRANSLATION) - ? GM_ABS_TRANS_ONLY_BITS - !allow_hp - : GM_ABS_TRANS_BITS; - trans_prec_diff = (gm->wmtype == TRANSLATION) - ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp - : GM_TRANS_PREC_DIFF; - params_cost += aom_count_signed_primitive_refsubexpfin( - (1 << trans_bits) + 1, SUBEXPFIN_K, - (ref_gm->wmmat[0] >> trans_prec_diff), - (gm->wmmat[0] >> trans_prec_diff)); - params_cost += aom_count_signed_primitive_refsubexpfin( - (1 << trans_bits) + 1, SUBEXPFIN_K, - (ref_gm->wmmat[1] >> trans_prec_diff), - (gm->wmmat[1] >> trans_prec_diff)); - AOM_FALLTHROUGH_INTENDED; - case IDENTITY: break; - default: assert(0); - } - return (params_cost << AV1_PROB_COST_SHIFT); -} - -static int do_gm_search_logic(SPEED_FEATURES *const sf, int num_refs_using_gm, - int frame) { - (void)num_refs_using_gm; - (void)frame; - switch (sf->gm_search_type) { - case GM_FULL_SEARCH: return 1; - case GM_REDUCED_REF_SEARCH: - return !(frame == LAST2_FRAME || frame == LAST3_FRAME); - case GM_DISABLE_SEARCH: return 0; - default: assert(0); - } - return 1; -} - -// Estimate if the source frame is screen content, based on the portion of -// blocks that have no more than 4 (experimentally selected) luma colors. -static int is_screen_content(const uint8_t *src, int use_hbd, int bd, - int stride, int width, int height) { - assert(src != NULL); - int counts = 0; - const int blk_w = 16; - const int blk_h = 16; - const int limit = 4; - for (int r = 0; r + blk_h <= height; r += blk_h) { - for (int c = 0; c + blk_w <= width; c += blk_w) { - int count_buf[1 << 12]; // Maximum (1 << 12) color levels. - const int n_colors = - use_hbd ? av1_count_colors_highbd(src + r * stride + c, stride, blk_w, - blk_h, bd, count_buf) - : av1_count_colors(src + r * stride + c, stride, blk_w, blk_h, - count_buf); - if (n_colors > 1 && n_colors <= limit) counts++; - } - } - // The threshold is 10%. - return counts * blk_h * blk_w * 10 > width * height; -} - -static const uint8_t ref_frame_flag_list[REF_FRAMES] = { 0, - AOM_LAST_FLAG, - AOM_LAST2_FLAG, - AOM_LAST3_FLAG, - AOM_GOLD_FLAG, - AOM_BWD_FLAG, - AOM_ALT2_FLAG, - AOM_ALT_FLAG }; - -// Enforce the number of references for each arbitrary frame limited to -// (INTER_REFS_PER_FRAME - 1) -static void enforce_max_ref_frames(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - MV_REFERENCE_FRAME ref_frame; - int total_valid_refs = 0; - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - if (cpi->ref_frame_flags & ref_frame_flag_list[ref_frame]) - total_valid_refs++; - } - - // NOTE(zoeliu): When all the possible reference frames are availble, we - // reduce the number of reference frames by 1, following the rules of: - // (1) Retain GOLDEN_FARME/ALTEF_FRAME; - // (2) Check the earliest 2 remaining reference frames, and remove the one - // with the lower quality factor, otherwise if both have been coded at - // the same quality level, remove the earliest reference frame. - - if (total_valid_refs == INTER_REFS_PER_FRAME) { - unsigned int min_ref_offset = UINT_MAX; - unsigned int second_min_ref_offset = UINT_MAX; - MV_REFERENCE_FRAME earliest_ref_frames[2] = { LAST3_FRAME, LAST2_FRAME }; - int earliest_buf_idxes[2] = { 0 }; - - // Locate the earliest two reference frames except GOLDEN/ALTREF. - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - // Retain GOLDEN/ALTERF - if (ref_frame == GOLDEN_FRAME || ref_frame == ALTREF_FRAME) continue; - - const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx; - if (buf_idx >= 0) { - const unsigned int ref_offset = - cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset; - - if (min_ref_offset == UINT_MAX) { - min_ref_offset = ref_offset; - earliest_ref_frames[0] = ref_frame; - earliest_buf_idxes[0] = buf_idx; - } else { - if (get_relative_dist(cm, ref_offset, min_ref_offset) < 0) { - second_min_ref_offset = min_ref_offset; - earliest_ref_frames[1] = earliest_ref_frames[0]; - earliest_buf_idxes[1] = earliest_buf_idxes[0]; - - min_ref_offset = ref_offset; - earliest_ref_frames[0] = ref_frame; - earliest_buf_idxes[0] = buf_idx; - } else if (second_min_ref_offset == UINT_MAX || - get_relative_dist(cm, ref_offset, second_min_ref_offset) < - 0) { - second_min_ref_offset = ref_offset; - earliest_ref_frames[1] = ref_frame; - earliest_buf_idxes[1] = buf_idx; - } - } - } - } - // Check the coding quality factors of the two earliest reference frames. - RATE_FACTOR_LEVEL ref_rf_level[2]; - double ref_rf_deltas[2]; - for (int i = 0; i < 2; ++i) { - ref_rf_level[i] = cpi->frame_rf_level[earliest_buf_idxes[i]]; - ref_rf_deltas[i] = rate_factor_deltas[ref_rf_level[i]]; - } - (void)ref_rf_level; - (void)ref_rf_deltas; - -#define USE_RF_LEVEL_TO_ENFORCE 1 -#if USE_RF_LEVEL_TO_ENFORCE - // If both earliest two reference frames are coded using the same rate- - // factor, disable the earliest reference frame; Otherwise disable the - // reference frame that uses a lower rate-factor delta. - const MV_REFERENCE_FRAME ref_frame_to_disable = - (ref_rf_deltas[0] <= ref_rf_deltas[1]) ? earliest_ref_frames[0] - : earliest_ref_frames[1]; -#else - // Always disable the earliest reference frame - const MV_REFERENCE_FRAME ref_frame_to_disable = earliest_ref_frames[0]; -#endif // USE_RF_LEVEL_TO_ENFORCE -#undef USE_RF_LEVEL_TO_ENFORCE - - switch (ref_frame_to_disable) { - case LAST_FRAME: cpi->ref_frame_flags &= ~AOM_LAST_FLAG; break; - case LAST2_FRAME: cpi->ref_frame_flags &= ~AOM_LAST2_FLAG; break; - case LAST3_FRAME: cpi->ref_frame_flags &= ~AOM_LAST3_FLAG; break; - case BWDREF_FRAME: cpi->ref_frame_flags &= ~AOM_BWD_FLAG; break; - case ALTREF2_FRAME: cpi->ref_frame_flags &= ~AOM_ALT2_FLAG; break; - default: break; - } - } -} - -static INLINE int av1_refs_are_one_sided(const AV1_COMMON *cm) { - assert(!frame_is_intra_only(cm)); - - int one_sided_refs = 1; - for (int ref = 0; ref < INTER_REFS_PER_FRAME; ++ref) { - const int buf_idx = cm->frame_refs[ref].idx; - if (buf_idx == INVALID_IDX) continue; - - const int ref_offset = - cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset; - if (get_relative_dist(cm, ref_offset, (int)cm->frame_offset) > 0) { - one_sided_refs = 0; // bwd reference - break; - } - } - return one_sided_refs; -} - -static INLINE void get_skip_mode_ref_offsets(const AV1_COMMON *cm, - int ref_offset[2]) { - ref_offset[0] = ref_offset[1] = 0; - if (!cm->is_skip_mode_allowed) return; - - const int buf_idx_0 = cm->frame_refs[cm->ref_frame_idx_0].idx; - const int buf_idx_1 = cm->frame_refs[cm->ref_frame_idx_1].idx; - assert(buf_idx_0 != INVALID_IDX && buf_idx_1 != INVALID_IDX); - - ref_offset[0] = cm->buffer_pool->frame_bufs[buf_idx_0].cur_frame_offset; - ref_offset[1] = cm->buffer_pool->frame_bufs[buf_idx_1].cur_frame_offset; -} - -static int check_skip_mode_enabled(AV1_COMP *const cpi) { - AV1_COMMON *const cm = &cpi->common; - - av1_setup_skip_mode_allowed(cm); - if (!cm->is_skip_mode_allowed) return 0; - - // Turn off skip mode if the temporal distances of the reference pair to the - // current frame are different by more than 1 frame. - const int cur_offset = (int)cm->frame_offset; - int ref_offset[2]; - get_skip_mode_ref_offsets(cm, ref_offset); - const int cur_to_ref0 = get_relative_dist(cm, cur_offset, ref_offset[0]); - const int cur_to_ref1 = abs(get_relative_dist(cm, cur_offset, ref_offset[1])); - if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0; - - // High Latency: Turn off skip mode if all refs are fwd. - if (cpi->all_one_sided_refs && cpi->oxcf.lag_in_frames > 0) return 0; - - static const int flag_list[REF_FRAMES] = { 0, - AOM_LAST_FLAG, - AOM_LAST2_FLAG, - AOM_LAST3_FLAG, - AOM_GOLD_FLAG, - AOM_BWD_FLAG, - AOM_ALT2_FLAG, - AOM_ALT_FLAG }; - const int ref_frame[2] = { cm->ref_frame_idx_0 + LAST_FRAME, - cm->ref_frame_idx_1 + LAST_FRAME }; - if (!(cpi->ref_frame_flags & flag_list[ref_frame[0]]) || - !(cpi->ref_frame_flags & flag_list[ref_frame[1]])) - return 0; - - return 1; -} - -// Function to decide if we can skip the global motion parameter computation -// for a particular ref frame -static INLINE int skip_gm_frame(AV1_COMMON *const cm, int ref_frame) { - if ((ref_frame == LAST3_FRAME || ref_frame == LAST2_FRAME) && - cm->global_motion[GOLDEN_FRAME].wmtype != IDENTITY) { - return get_relative_dist( - cm, cm->cur_frame->ref_frame_offset[ref_frame - LAST_FRAME], - cm->cur_frame->ref_frame_offset[GOLDEN_FRAME - LAST_FRAME]) <= 0; - } - return 0; -} - -static void set_default_interp_skip_flags(AV1_COMP *cpi) { - const int num_planes = av1_num_planes(&cpi->common); - cpi->default_interp_skip_flags = (num_planes == 1) - ? DEFAULT_LUMA_INTERP_SKIP_FLAG - : DEFAULT_INTERP_SKIP_FLAG; -} - -static void encode_frame_internal(AV1_COMP *cpi) { - ThreadData *const td = &cpi->td; - MACROBLOCK *const x = &td->mb; - AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - RD_COUNTS *const rdc = &cpi->td.rd_counts; - int i; - - x->min_partition_size = AOMMIN(x->min_partition_size, cm->seq_params.sb_size); - x->max_partition_size = AOMMIN(x->max_partition_size, cm->seq_params.sb_size); -#if CONFIG_DIST_8X8 - x->using_dist_8x8 = cpi->oxcf.using_dist_8x8; - x->tune_metric = cpi->oxcf.tuning; -#endif - cm->setup_mi(cm); - - xd->mi = cm->mi_grid_visible; - xd->mi[0] = cm->mi; - - av1_zero(*td->counts); - av1_zero(rdc->comp_pred_diff); - - if (frame_is_intra_only(cm)) { - if (cm->seq_params.force_screen_content_tools == 2) { - cm->allow_screen_content_tools = - cpi->oxcf.content == AOM_CONTENT_SCREEN || - is_screen_content(cpi->source->y_buffer, - cpi->source->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, - cpi->source->y_stride, cpi->source->y_width, - cpi->source->y_height); - } else { - cm->allow_screen_content_tools = - cm->seq_params.force_screen_content_tools; - } - } - - // Allow intrabc when screen content tools are enabled. - cm->allow_intrabc = cm->allow_screen_content_tools; - // Reset the flag. - cpi->intrabc_used = 0; - // Need to disable intrabc when superres is selected - if (av1_superres_scaled(cm)) { - cm->allow_intrabc = 0; - } - - if (cpi->oxcf.pass != 1 && av1_use_hash_me(cm)) { - // add to hash table - const int pic_width = cpi->source->y_crop_width; - const int pic_height = cpi->source->y_crop_height; - uint32_t *block_hash_values[2][2]; - int8_t *is_block_same[2][3]; - int k, j; - - for (k = 0; k < 2; k++) { - for (j = 0; j < 2; j++) { - CHECK_MEM_ERROR(cm, block_hash_values[k][j], - aom_malloc(sizeof(uint32_t) * pic_width * pic_height)); - } - - for (j = 0; j < 3; j++) { - CHECK_MEM_ERROR(cm, is_block_same[k][j], - aom_malloc(sizeof(int8_t) * pic_width * pic_height)); - } - } - - av1_hash_table_create(&cm->cur_frame->hash_table); - av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0], - is_block_same[0], &cpi->td.mb); - av1_generate_block_hash_value(cpi->source, 4, block_hash_values[0], - block_hash_values[1], is_block_same[0], - is_block_same[1], &cpi->td.mb); - av1_add_to_hash_map_by_row_with_precal_data( - &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2], - pic_width, pic_height, 4); - av1_generate_block_hash_value(cpi->source, 8, block_hash_values[1], - block_hash_values[0], is_block_same[1], - is_block_same[0], &cpi->td.mb); - av1_add_to_hash_map_by_row_with_precal_data( - &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2], - pic_width, pic_height, 8); - av1_generate_block_hash_value(cpi->source, 16, block_hash_values[0], - block_hash_values[1], is_block_same[0], - is_block_same[1], &cpi->td.mb); - av1_add_to_hash_map_by_row_with_precal_data( - &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2], - pic_width, pic_height, 16); - av1_generate_block_hash_value(cpi->source, 32, block_hash_values[1], - block_hash_values[0], is_block_same[1], - is_block_same[0], &cpi->td.mb); - av1_add_to_hash_map_by_row_with_precal_data( - &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2], - pic_width, pic_height, 32); - av1_generate_block_hash_value(cpi->source, 64, block_hash_values[0], - block_hash_values[1], is_block_same[0], - is_block_same[1], &cpi->td.mb); - av1_add_to_hash_map_by_row_with_precal_data( - &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2], - pic_width, pic_height, 64); - - av1_generate_block_hash_value(cpi->source, 128, block_hash_values[1], - block_hash_values[0], is_block_same[1], - is_block_same[0], &cpi->td.mb); - av1_add_to_hash_map_by_row_with_precal_data( - &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2], - pic_width, pic_height, 128); - - for (k = 0; k < 2; k++) { - for (j = 0; j < 2; j++) { - aom_free(block_hash_values[k][j]); - } - - for (j = 0; j < 3; j++) { - aom_free(is_block_same[k][j]); - } - } - } - - for (i = 0; i < MAX_SEGMENTS; ++i) { - const int qindex = cm->seg.enabled - ? av1_get_qindex(&cm->seg, i, cm->base_qindex) - : cm->base_qindex; - xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 && - cm->u_dc_delta_q == 0 && cm->u_ac_delta_q == 0 && - cm->v_dc_delta_q == 0 && cm->v_ac_delta_q == 0; - if (xd->lossless[i]) cpi->has_lossless_segment = 1; - xd->qindex[i] = qindex; - if (xd->lossless[i]) { - cpi->optimize_seg_arr[i] = 0; - } else { - cpi->optimize_seg_arr[i] = cpi->optimize_speed_feature; - } - } - cm->coded_lossless = is_coded_lossless(cm, xd); - cm->all_lossless = cm->coded_lossless && !av1_superres_scaled(cm); - - cm->tx_mode = select_tx_mode(cpi); - - // Fix delta q resolution for the moment - cm->delta_q_res = DEFAULT_DELTA_Q_RES; - // Set delta_q_present_flag before it is used for the first time - cm->delta_lf_res = DEFAULT_DELTA_LF_RES; - cm->delta_q_present_flag = cpi->oxcf.deltaq_mode != NO_DELTA_Q; - cm->delta_lf_present_flag = cpi->oxcf.deltaq_mode == DELTA_Q_LF; - cm->delta_lf_multi = DEFAULT_DELTA_LF_MULTI; - // update delta_q_present_flag and delta_lf_present_flag based on base_qindex - cm->delta_q_present_flag &= cm->base_qindex > 0; - cm->delta_lf_present_flag &= cm->base_qindex > 0; - - av1_frame_init_quantizer(cpi); - - av1_initialize_rd_consts(cpi); - av1_initialize_me_consts(cpi, x, cm->base_qindex); - init_encode_frame_mb_context(cpi); - set_default_interp_skip_flags(cpi); - if (cm->prev_frame) - cm->last_frame_seg_map = cm->prev_frame->seg_map; - else - cm->last_frame_seg_map = NULL; - cm->current_frame_seg_map = cm->cur_frame->seg_map; - if (cm->allow_intrabc || cm->coded_lossless) { - av1_set_default_ref_deltas(cm->lf.ref_deltas); - av1_set_default_mode_deltas(cm->lf.mode_deltas); - } else if (cm->prev_frame) { - memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES); - memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS); - } - memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES); - memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS); - - // Special case: set prev_mi to NULL when the previous mode info - // context cannot be used. - cm->prev_mi = cm->allow_ref_frame_mvs ? cm->prev_mip : NULL; - - x->txb_split_count = 0; - - av1_zero(rdc->global_motion_used); - av1_zero(cpi->gmparams_cost); -#if !CONFIG_GLOBAL_MOTION_SEARCH - cpi->global_motion_search_done = 1; -#endif // !CONFIG_GLOBAL_MOTION_SEARCH - if (cpi->common.frame_type == INTER_FRAME && cpi->source && - !cpi->global_motion_search_done) { - YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES]; - int frame; - double params_by_motion[RANSAC_NUM_MOTIONS * (MAX_PARAMDIM - 1)]; - const double *params_this_motion; - int inliers_by_motion[RANSAC_NUM_MOTIONS]; - WarpedMotionParams tmp_wm_params; - static const double kIdentityParams[MAX_PARAMDIM - 1] = { - 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0 - }; - int num_refs_using_gm = 0; - - for (frame = ALTREF_FRAME; frame >= LAST_FRAME; --frame) { - ref_buf[frame] = get_ref_frame_buffer(cpi, frame); - int pframe; - cm->global_motion[frame] = default_warp_params; - const WarpedMotionParams *ref_params = - cm->prev_frame ? &cm->prev_frame->global_motion[frame] - : &default_warp_params; - // check for duplicate buffer - for (pframe = ALTREF_FRAME; pframe > frame; --pframe) { - if (ref_buf[frame] == ref_buf[pframe]) break; - } - if (pframe > frame) { - memcpy(&cm->global_motion[frame], &cm->global_motion[pframe], - sizeof(WarpedMotionParams)); - } else if (ref_buf[frame] && - ref_buf[frame]->y_crop_width == cpi->source->y_crop_width && - ref_buf[frame]->y_crop_height == cpi->source->y_crop_height && - do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame) && - !(cpi->sf.selective_ref_gm && skip_gm_frame(cm, frame))) { - TransformationType model; - const int64_t ref_frame_error = - av1_frame_error(xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, - ref_buf[frame]->y_buffer, ref_buf[frame]->y_stride, - cpi->source->y_buffer, cpi->source->y_width, - cpi->source->y_height, cpi->source->y_stride); - - if (ref_frame_error == 0) continue; - - aom_clear_system_state(); - for (model = ROTZOOM; model < GLOBAL_TRANS_TYPES_ENC; ++model) { - int64_t best_warp_error = INT64_MAX; - // Initially set all params to identity. - for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) { - memcpy(params_by_motion + (MAX_PARAMDIM - 1) * i, kIdentityParams, - (MAX_PARAMDIM - 1) * sizeof(*params_by_motion)); - } - - compute_global_motion_feature_based( - model, cpi->source, ref_buf[frame], - cpi->common.seq_params.bit_depth, inliers_by_motion, - params_by_motion, RANSAC_NUM_MOTIONS); - - for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) { - if (inliers_by_motion[i] == 0) continue; - - params_this_motion = params_by_motion + (MAX_PARAMDIM - 1) * i; - convert_model_to_params(params_this_motion, &tmp_wm_params); - - if (tmp_wm_params.wmtype != IDENTITY) { - const int64_t warp_error = refine_integerized_param( - &tmp_wm_params, tmp_wm_params.wmtype, - xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, xd->bd, - ref_buf[frame]->y_buffer, ref_buf[frame]->y_width, - ref_buf[frame]->y_height, ref_buf[frame]->y_stride, - cpi->source->y_buffer, cpi->source->y_width, - cpi->source->y_height, cpi->source->y_stride, 5, - best_warp_error); - if (warp_error < best_warp_error) { - best_warp_error = warp_error; - // Save the wm_params modified by refine_integerized_param() - // rather than motion index to avoid rerunning refine() below. - memcpy(&(cm->global_motion[frame]), &tmp_wm_params, - sizeof(WarpedMotionParams)); - } - } - } - if (cm->global_motion[frame].wmtype <= AFFINE) - if (!get_shear_params(&cm->global_motion[frame])) - cm->global_motion[frame] = default_warp_params; - - if (cm->global_motion[frame].wmtype == TRANSLATION) { - cm->global_motion[frame].wmmat[0] = - convert_to_trans_prec(cm->allow_high_precision_mv, - cm->global_motion[frame].wmmat[0]) * - GM_TRANS_ONLY_DECODE_FACTOR; - cm->global_motion[frame].wmmat[1] = - convert_to_trans_prec(cm->allow_high_precision_mv, - cm->global_motion[frame].wmmat[1]) * - GM_TRANS_ONLY_DECODE_FACTOR; - } - - // If the best error advantage found doesn't meet the threshold for - // this motion type, revert to IDENTITY. - if (!is_enough_erroradvantage( - (double)best_warp_error / ref_frame_error, - gm_get_params_cost(&cm->global_motion[frame], ref_params, - cm->allow_high_precision_mv), - cpi->sf.gm_erroradv_type)) { - cm->global_motion[frame] = default_warp_params; - } - if (cm->global_motion[frame].wmtype != IDENTITY) break; - } - aom_clear_system_state(); - } - if (cm->global_motion[frame].wmtype != IDENTITY) num_refs_using_gm++; - cpi->gmparams_cost[frame] = - gm_get_params_cost(&cm->global_motion[frame], ref_params, - cm->allow_high_precision_mv) + - cpi->gmtype_cost[cm->global_motion[frame].wmtype] - - cpi->gmtype_cost[IDENTITY]; - } - // clear disabled ref_frames - for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) { - const int ref_disabled = - !(cpi->ref_frame_flags & ref_frame_flag_list[frame]); - if (ref_disabled && cpi->sf.recode_loop != DISALLOW_RECODE) { - cpi->gmparams_cost[frame] = 0; - cm->global_motion[frame] = default_warp_params; - } - } - cpi->global_motion_search_done = 1; - } - memcpy(cm->cur_frame->global_motion, cm->global_motion, - REF_FRAMES * sizeof(WarpedMotionParams)); - - av1_setup_motion_field(cm); - - cpi->all_one_sided_refs = - frame_is_intra_only(cm) ? 0 : av1_refs_are_one_sided(cm); - - cm->skip_mode_flag = check_skip_mode_enabled(cpi); - - { - struct aom_usec_timer emr_timer; - aom_usec_timer_start(&emr_timer); - -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm, - &cpi->twopass.this_frame_mb_stats); - } -#endif - - if (cpi->row_mt && (cpi->oxcf.max_threads > 1)) - av1_encode_tiles_mt(cpi); - else if (AOMMIN(cpi->oxcf.max_threads, cm->tile_cols * cm->tile_rows) > 1) - av1_encode_tiles_mt(cpi); - else - encode_tiles(cpi); - - aom_usec_timer_mark(&emr_timer); - cpi->time_encode_sb_row += aom_usec_timer_elapsed(&emr_timer); - } - - // If intrabc is allowed but never selected, reset the allow_intrabc flag. - if (cm->allow_intrabc && !cpi->intrabc_used) cm->allow_intrabc = 0; - if (cm->allow_intrabc) cm->delta_lf_present_flag = 0; -} - -void av1_encode_frame(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - // Indicates whether or not to use a default reduced set for ext-tx - // rather than the potential full set of 16 transforms - cm->reduced_tx_set_used = 0; - - if (cm->show_frame == 0) { - int arf_offset = AOMMIN( - (MAX_GF_INTERVAL - 1), - cpi->twopass.gf_group.arf_src_offset[cpi->twopass.gf_group.index]); - int brf_offset = - cpi->twopass.gf_group.brf_src_offset[cpi->twopass.gf_group.index]; - arf_offset = AOMMIN((MAX_GF_INTERVAL - 1), arf_offset + brf_offset); - cm->frame_offset = cm->current_video_frame + arf_offset; - } else { - cm->frame_offset = cm->current_video_frame; - } - cm->frame_offset %= (1 << (cm->seq_params.order_hint_bits_minus_1 + 1)); - - // Make sure segment_id is no larger than last_active_segid. - if (cm->seg.enabled && cm->seg.update_map) { - const int mi_rows = cm->mi_rows; - const int mi_cols = cm->mi_cols; - const int last_active_segid = cm->seg.last_active_segid; - uint8_t *map = cpi->segmentation_map; - for (int mi_row = 0; mi_row < mi_rows; ++mi_row) { - for (int mi_col = 0; mi_col < mi_cols; ++mi_col) { - map[mi_col] = AOMMIN(map[mi_col], last_active_segid); - } - map += mi_cols; - } - } - - av1_setup_frame_buf_refs(cm); - if (cpi->sf.selective_ref_frame >= 2) enforce_max_ref_frames(cpi); - av1_setup_frame_sign_bias(cm); - -#if CONFIG_MISMATCH_DEBUG - mismatch_reset_frame(num_planes); -#else - (void)num_planes; -#endif - - cpi->allow_comp_inter_inter = !frame_is_intra_only(cm); - - if (cpi->sf.frame_parameter_update) { - int i; - RD_OPT *const rd_opt = &cpi->rd; - RD_COUNTS *const rdc = &cpi->td.rd_counts; - - // This code does a single RD pass over the whole frame assuming - // either compound, single or hybrid prediction as per whatever has - // worked best for that type of frame in the past. - // It also predicts whether another coding mode would have worked - // better than this coding mode. If that is the case, it remembers - // that for subsequent frames. - // It does the same analysis for transform size selection also. - // - // TODO(zoeliu): To investigate whether a frame_type other than - // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately. - const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); - int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type]; - const int is_alt_ref = frame_type == ALTREF_FRAME; - - /* prediction (compound, single or hybrid) mode selection */ - // NOTE: "is_alt_ref" is true only for OVERLAY/INTNL_OVERLAY frames - if (is_alt_ref || !cpi->allow_comp_inter_inter) - cm->reference_mode = SINGLE_REFERENCE; - else - cm->reference_mode = REFERENCE_MODE_SELECT; - - cm->interp_filter = SWITCHABLE; - if (cm->large_scale_tile) cm->interp_filter = EIGHTTAP_REGULAR; - - cm->switchable_motion_mode = 1; - - rdc->compound_ref_used_flag = 0; - rdc->skip_mode_used_flag = 0; - - encode_frame_internal(cpi); - - for (i = 0; i < REFERENCE_MODES; ++i) - mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2; - - if (cm->reference_mode == REFERENCE_MODE_SELECT) { - // Use a flag that includes 4x4 blocks - if (rdc->compound_ref_used_flag == 0) { - cm->reference_mode = SINGLE_REFERENCE; -#if CONFIG_ENTROPY_STATS - av1_zero(cpi->td.counts->comp_inter); -#endif // CONFIG_ENTROPY_STATS - } - } - // Re-check on the skip mode status as reference mode may have been changed. - if (frame_is_intra_only(cm) || cm->reference_mode == SINGLE_REFERENCE) { - cm->is_skip_mode_allowed = 0; - cm->skip_mode_flag = 0; - } - if (cm->skip_mode_flag && rdc->skip_mode_used_flag == 0) - cm->skip_mode_flag = 0; - - if (!cm->large_scale_tile) { - if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0) - cm->tx_mode = TX_MODE_LARGEST; - } - } else { - encode_frame_internal(cpi); - } -} - -static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd, - FRAME_COUNTS *counts, TX_SIZE tx_size, int depth, - int blk_row, int blk_col, - uint8_t allow_update_cdf) { - MB_MODE_INFO *mbmi = xd->mi[0]; - const BLOCK_SIZE bsize = mbmi->sb_type; - const int max_blocks_high = max_block_high(xd, bsize, 0); - const int max_blocks_wide = max_block_wide(xd, bsize, 0); - int ctx = txfm_partition_context(xd->above_txfm_context + blk_col, - xd->left_txfm_context + blk_row, - mbmi->sb_type, tx_size); - const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col); - const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index]; - - if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; - assert(tx_size > TX_4X4); - - if (depth == MAX_VARTX_DEPTH) { - // Don't add to counts in this case - mbmi->tx_size = tx_size; - txfm_partition_update(xd->above_txfm_context + blk_col, - xd->left_txfm_context + blk_row, tx_size, tx_size); - return; - } - - if (tx_size == plane_tx_size) { -#if CONFIG_ENTROPY_STATS - ++counts->txfm_partition[ctx][0]; -#endif - if (allow_update_cdf) - update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 0, 2); - mbmi->tx_size = tx_size; - txfm_partition_update(xd->above_txfm_context + blk_col, - xd->left_txfm_context + blk_row, tx_size, tx_size); - } else { - const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; - const int bsw = tx_size_wide_unit[sub_txs]; - const int bsh = tx_size_high_unit[sub_txs]; - -#if CONFIG_ENTROPY_STATS - ++counts->txfm_partition[ctx][1]; -#endif - if (allow_update_cdf) - update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 1, 2); - ++x->txb_split_count; - - if (sub_txs == TX_4X4) { - mbmi->inter_tx_size[txb_size_index] = TX_4X4; - mbmi->tx_size = TX_4X4; - txfm_partition_update(xd->above_txfm_context + blk_col, - xd->left_txfm_context + blk_row, TX_4X4, tx_size); - return; - } - - for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) { - for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) { - int offsetr = row; - int offsetc = col; - - update_txfm_count(x, xd, counts, sub_txs, depth + 1, blk_row + offsetr, - blk_col + offsetc, allow_update_cdf); - } - } - } -} - -static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x, - BLOCK_SIZE plane_bsize, int mi_row, - int mi_col, FRAME_COUNTS *td_counts, - uint8_t allow_update_cdf) { - MACROBLOCKD *xd = &x->e_mbd; - const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0]; - const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0); - const int bh = tx_size_high_unit[max_tx_size]; - const int bw = tx_size_wide_unit[max_tx_size]; - int idx, idy; - - xd->above_txfm_context = cm->above_txfm_context[xd->tile.tile_row] + mi_col; - xd->left_txfm_context = - xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); - - for (idy = 0; idy < mi_height; idy += bh) - for (idx = 0; idx < mi_width; idx += bw) - update_txfm_count(x, xd, td_counts, max_tx_size, 0, idy, idx, - allow_update_cdf); -} - -static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row, - int blk_col) { - MB_MODE_INFO *mbmi = xd->mi[0]; - const BLOCK_SIZE bsize = mbmi->sb_type; - const int max_blocks_high = max_block_high(xd, bsize, 0); - const int max_blocks_wide = max_block_wide(xd, bsize, 0); - const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col); - const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index]; - - if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; - - if (tx_size == plane_tx_size) { - mbmi->tx_size = tx_size; - txfm_partition_update(xd->above_txfm_context + blk_col, - xd->left_txfm_context + blk_row, tx_size, tx_size); - - } else { - if (tx_size == TX_8X8) { - mbmi->inter_tx_size[txb_size_index] = TX_4X4; - mbmi->tx_size = TX_4X4; - txfm_partition_update(xd->above_txfm_context + blk_col, - xd->left_txfm_context + blk_row, TX_4X4, tx_size); - return; - } - const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; - const int bsw = tx_size_wide_unit[sub_txs]; - const int bsh = tx_size_high_unit[sub_txs]; - for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) { - for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) { - const int offsetr = blk_row + row; - const int offsetc = blk_col + col; - if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; - set_txfm_context(xd, sub_txs, offsetr, offsetc); - } - } - } -} - -static void tx_partition_set_contexts(const AV1_COMMON *const cm, - MACROBLOCKD *xd, BLOCK_SIZE plane_bsize, - int mi_row, int mi_col) { - const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0]; - const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0); - const int bh = tx_size_high_unit[max_tx_size]; - const int bw = tx_size_wide_unit[max_tx_size]; - int idx, idy; - - xd->above_txfm_context = cm->above_txfm_context[xd->tile.tile_row] + mi_col; - xd->left_txfm_context = - xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK); - - for (idy = 0; idy < mi_height; idy += bh) - for (idx = 0; idx < mi_width; idx += bw) - set_txfm_context(xd, max_tx_size, idy, idx); -} - -static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data, - ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run, - int mi_row, int mi_col, BLOCK_SIZE bsize, - int *rate) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO **mi_4x4 = xd->mi; - MB_MODE_INFO *mbmi = mi_4x4[0]; - const int seg_skip = - segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP); - const int mis = cm->mi_stride; - const int mi_width = mi_size_wide[bsize]; - const int mi_height = mi_size_high[bsize]; - const int is_inter = is_inter_block(mbmi); - - if (cpi->sf.mode_pruning_based_on_two_pass_partition_search && - x->cb_partition_scan) { - for (int row = mi_row; row < mi_row + mi_width; - row += FIRST_PARTITION_PASS_SAMPLE_REGION) { - for (int col = mi_col; col < mi_col + mi_height; - col += FIRST_PARTITION_PASS_SAMPLE_REGION) { - const int index = av1_first_partition_pass_stats_index(row, col); - FIRST_PARTITION_PASS_STATS *const stats = - &x->first_partition_pass_stats[index]; - // Increase the counter of data samples. - ++stats->sample_counts; - // Increase the counter for ref_frame[0] and ref_frame[1]. - if (stats->ref0_counts[mbmi->ref_frame[0]] < 255) - ++stats->ref0_counts[mbmi->ref_frame[0]]; - if (mbmi->ref_frame[1] >= 0 && - stats->ref1_counts[mbmi->ref_frame[0]] < 255) - ++stats->ref1_counts[mbmi->ref_frame[1]]; - } - } - } - - if (!is_inter) { - xd->cfl.is_chroma_reference = - is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x, - cm->seq_params.subsampling_y); - xd->cfl.store_y = store_cfl_required(cm, xd); - mbmi->skip = 1; - for (int plane = 0; plane < num_planes; ++plane) { - av1_encode_intra_block_plane(cpi, x, bsize, plane, - cpi->optimize_seg_arr[mbmi->segment_id], - mi_row, mi_col); - } - - // If there is at least one lossless segment, force the skip for intra - // block to be 0, in order to avoid the segment_id to be changed by in - // write_segment_id(). - if (!cpi->common.seg.segid_preskip && cpi->common.seg.update_map && - cpi->has_lossless_segment) - mbmi->skip = 0; - - xd->cfl.store_y = 0; - if (av1_allow_palette(cm->allow_screen_content_tools, bsize)) { - for (int plane = 0; plane < AOMMIN(2, num_planes); ++plane) { - if (mbmi->palette_mode_info.palette_size[plane] > 0) { - if (!dry_run) { - av1_tokenize_color_map(x, plane, t, bsize, mbmi->tx_size, - PALETTE_MAP, tile_data->allow_update_cdf, - td->counts); - } else if (dry_run == DRY_RUN_COSTCOEFFS) { - rate += - av1_cost_color_map(x, plane, bsize, mbmi->tx_size, PALETTE_MAP); - } - } - } - } - - av1_update_txb_context(cpi, td, dry_run, bsize, rate, mi_row, mi_col, - tile_data->allow_update_cdf); - } else { - int ref; - const int is_compound = has_second_ref(mbmi); - - set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - for (ref = 0; ref < 1 + is_compound; ++ref) { - YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mbmi->ref_frame[ref]); - assert(IMPLIES(!is_intrabc_block(mbmi), cfg)); - av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, - &xd->block_refs[ref]->sf, num_planes); - } - - av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); - if (mbmi->motion_mode == OBMC_CAUSAL) - av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); - -#if CONFIG_MISMATCH_DEBUG - if (dry_run == OUTPUT_ENABLED) { - for (int plane = 0; plane < num_planes; ++plane) { - const struct macroblockd_plane *pd = &xd->plane[plane]; - int pixel_c, pixel_r; - mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, - pd->subsampling_x, pd->subsampling_y); - if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x, - pd->subsampling_y)) - continue; - mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, cm->frame_offset, - plane, pixel_c, pixel_r, pd->width, - pd->height, - xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); - } - } -#else - (void)num_planes; -#endif - - av1_encode_sb(cpi, x, bsize, mi_row, mi_col, dry_run); - av1_tokenize_sb_vartx(cpi, td, t, dry_run, mi_row, mi_col, bsize, rate, - tile_data->allow_update_cdf); - } - - if (!dry_run) { - if (av1_allow_intrabc(cm) && is_intrabc_block(mbmi)) - td->intrabc_used_this_tile = 1; - if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id] && - mbmi->sb_type > BLOCK_4X4 && !(is_inter && (mbmi->skip || seg_skip))) { - if (is_inter) { - tx_partition_count_update(cm, x, bsize, mi_row, mi_col, td->counts, - tile_data->allow_update_cdf); - } else { - if (mbmi->tx_size != max_txsize_rect_lookup[bsize]) - ++x->txb_split_count; - if (block_signals_txsize(bsize)) { - const int tx_size_ctx = get_tx_size_context(xd); - const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize); - const int depth = tx_size_to_depth(mbmi->tx_size, bsize); - const int max_depths = bsize_to_max_depth(bsize); - - if (tile_data->allow_update_cdf) - update_cdf(xd->tile_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx], - depth, max_depths + 1); -#if CONFIG_ENTROPY_STATS - ++td->counts->intra_tx_size[tx_size_cat][tx_size_ctx][depth]; -#endif - } - } - assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(xd, mbmi))); - } else { - int i, j; - TX_SIZE intra_tx_size; - // The new intra coding scheme requires no change of transform size - if (is_inter) { - if (xd->lossless[mbmi->segment_id]) { - intra_tx_size = TX_4X4; - } else { - intra_tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode); - } - } else { - intra_tx_size = mbmi->tx_size; - } - - for (j = 0; j < mi_height; j++) - for (i = 0; i < mi_width; i++) - if (mi_col + i < cm->mi_cols && mi_row + j < cm->mi_rows) - mi_4x4[mis * j + i]->tx_size = intra_tx_size; - - if (intra_tx_size != max_txsize_rect_lookup[bsize]) ++x->txb_split_count; - } - } - - if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type) && - is_inter && !(mbmi->skip || seg_skip) && - !xd->lossless[mbmi->segment_id]) { - if (dry_run) tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col); - } else { - TX_SIZE tx_size = mbmi->tx_size; - // The new intra coding scheme requires no change of transform size - if (is_inter) { - if (xd->lossless[mbmi->segment_id]) { - tx_size = TX_4X4; - } else { - tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode); - } - } else { - tx_size = (bsize > BLOCK_4X4) ? tx_size : TX_4X4; - } - mbmi->tx_size = tx_size; - set_txfm_ctxs(tx_size, xd->n4_w, xd->n4_h, - (mbmi->skip || seg_skip) && is_inter_block(mbmi), xd); - } - CFL_CTX *const cfl = &xd->cfl; - if (is_inter_block(mbmi) && - !is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x, - cfl->subsampling_y) && - is_cfl_allowed(xd)) { - cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size); - } -} diff --git a/third_party/aom/av1/encoder/encodeframe.h b/third_party/aom/av1/encoder/encodeframe.h deleted file mode 100644 index e8cf9b468..000000000 --- a/third_party/aom/av1/encoder/encodeframe.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_ENCODEFRAME_H_ -#define AOM_AV1_ENCODER_ENCODEFRAME_H_ - -#include "aom/aom_integer.h" -#include "av1/common/blockd.h" -#include "av1/common/enums.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define DELTAQ_MODULATION 1 // 0: variance based, 1: wavelet AC energy based - -struct macroblock; -struct yv12_buffer_config; -struct AV1_COMP; -struct ThreadData; - -void av1_setup_src_planes(struct macroblock *x, - const struct yv12_buffer_config *src, int mi_row, - int mi_col, const int num_planes); - -void av1_encode_frame(struct AV1_COMP *cpi); - -void av1_alloc_tile_data(struct AV1_COMP *cpi); -void av1_init_tile_data(struct AV1_COMP *cpi); -void av1_encode_tile(struct AV1_COMP *cpi, struct ThreadData *td, int tile_row, - int tile_col); -void av1_encode_sb_row(struct AV1_COMP *cpi, struct ThreadData *td, - int tile_row, int tile_col, int mi_row); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_ENCODEFRAME_H_ diff --git a/third_party/aom/av1/encoder/encodemb.c b/third_party/aom/av1/encoder/encodemb.c deleted file mode 100644 index ad12577e6..000000000 --- a/third_party/aom/av1/encoder/encodemb.c +++ /dev/null @@ -1,649 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/bitwriter.h" -#include "aom_dsp/quantize.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/mem.h" - -#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG -#include "aom_util/debug_util.h" -#endif // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG - -#include "av1/common/cfl.h" -#include "av1/common/idct.h" -#include "av1/common/reconinter.h" -#include "av1/common/reconintra.h" -#include "av1/common/scan.h" - -#include "av1/encoder/av1_quantize.h" -#include "av1/encoder/encodemb.h" -#include "av1/encoder/encodetxb.h" -#include "av1/encoder/hybrid_fwd_txfm.h" -#include "av1/encoder/rd.h" -#include "av1/encoder/rdopt.h" - -// Check if one needs to use c version subtraction. -static int check_subtract_block_size(int w, int h) { return w < 4 || h < 4; } - -static void subtract_block(const MACROBLOCKD *xd, int rows, int cols, - int16_t *diff, ptrdiff_t diff_stride, - const uint8_t *src8, ptrdiff_t src_stride, - const uint8_t *pred8, ptrdiff_t pred_stride) { - if (check_subtract_block_size(rows, cols)) { - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - aom_highbd_subtract_block_c(rows, cols, diff, diff_stride, src8, - src_stride, pred8, pred_stride, xd->bd); - return; - } - aom_subtract_block_c(rows, cols, diff, diff_stride, src8, src_stride, pred8, - pred_stride); - - return; - } - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - aom_highbd_subtract_block(rows, cols, diff, diff_stride, src8, src_stride, - pred8, pred_stride, xd->bd); - return; - } - aom_subtract_block(rows, cols, diff, diff_stride, src8, src_stride, pred8, - pred_stride); -} - -void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize, - int blk_col, int blk_row, TX_SIZE tx_size) { - MACROBLOCKD *const xd = &x->e_mbd; - struct macroblock_plane *const p = &x->plane[plane]; - const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane]; - const int diff_stride = block_size_wide[plane_bsize]; - const int src_stride = p->src.stride; - const int dst_stride = pd->dst.stride; - const int tx1d_width = tx_size_wide[tx_size]; - const int tx1d_height = tx_size_high[tx_size]; - uint8_t *dst = - &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; - uint8_t *src = - &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]]; - int16_t *src_diff = - &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]]; - subtract_block(xd, tx1d_height, tx1d_width, src_diff, diff_stride, src, - src_stride, dst, dst_stride); -} - -void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { - struct macroblock_plane *const p = &x->plane[plane]; - const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane]; - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); - const int bw = block_size_wide[plane_bsize]; - const int bh = block_size_high[plane_bsize]; - const MACROBLOCKD *xd = &x->e_mbd; - - subtract_block(xd, bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, - pd->dst.buf, pd->dst.stride); -} - -int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane, - int block, TX_SIZE tx_size, TX_TYPE tx_type, - const TXB_CTX *const txb_ctx, int fast_mode, - int *rate_cost) { - MACROBLOCKD *const xd = &mb->e_mbd; - struct macroblock_plane *const p = &mb->plane[plane]; - const int eob = p->eobs[block]; - const int segment_id = xd->mi[0]->segment_id; - - if (eob == 0 || !cpi->optimize_seg_arr[segment_id] || - xd->lossless[segment_id]) { - *rate_cost = av1_cost_skip_txb(mb, txb_ctx, plane, tx_size); - return eob; - } - - (void)fast_mode; - return av1_optimize_txb_new(cpi, mb, plane, block, tx_size, tx_type, txb_ctx, - rate_cost, cpi->oxcf.sharpness); -} - -typedef enum QUANT_FUNC { - QUANT_FUNC_LOWBD = 0, - QUANT_FUNC_HIGHBD = 1, - QUANT_FUNC_TYPES = 2 -} QUANT_FUNC; - -static AV1_QUANT_FACADE - quant_func_list[AV1_XFORM_QUANT_TYPES][QUANT_FUNC_TYPES] = { - { av1_quantize_fp_facade, av1_highbd_quantize_fp_facade }, - { av1_quantize_b_facade, av1_highbd_quantize_b_facade }, - { av1_quantize_dc_facade, av1_highbd_quantize_dc_facade }, - { NULL, NULL } - }; - -void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, - int blk_row, int blk_col, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, TX_TYPE tx_type, - AV1_XFORM_QUANT xform_quant_idx) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const struct macroblock_plane *const p = &x->plane[plane]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); - - tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); - tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); - tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - uint16_t *const eob = &p->eobs[block]; - const int diff_stride = block_size_wide[plane_bsize]; - int seg_id = mbmi->segment_id; - const TX_SIZE qm_tx_size = av1_get_adjusted_tx_size(tx_size); - // Use a flat matrix (i.e. no weighting) for 1D and Identity transforms - const qm_val_t *qmatrix = - IS_2D_TRANSFORM(tx_type) ? pd->seg_qmatrix[seg_id][qm_tx_size] - : cm->gqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size]; - const qm_val_t *iqmatrix = - IS_2D_TRANSFORM(tx_type) - ? pd->seg_iqmatrix[seg_id][qm_tx_size] - : cm->giqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size]; - - const int src_offset = (blk_row * diff_stride + blk_col); - const int16_t *src_diff = &p->src_diff[src_offset << tx_size_wide_log2[0]]; - QUANT_PARAM qparam; - qparam.log_scale = av1_get_tx_scale(tx_size); - qparam.tx_size = tx_size; - qparam.qmatrix = qmatrix; - qparam.iqmatrix = iqmatrix; - TxfmParam txfm_param; - txfm_param.tx_type = tx_type; - txfm_param.tx_size = tx_size; - txfm_param.lossless = xd->lossless[mbmi->segment_id]; - txfm_param.tx_set_type = av1_get_ext_tx_set_type( - txfm_param.tx_size, is_inter_block(mbmi), cm->reduced_tx_set_used); - - txfm_param.bd = xd->bd; - txfm_param.is_hbd = get_bitdepth_data_path_index(xd); - - av1_fwd_txfm(src_diff, coeff, diff_stride, &txfm_param); - - if (xform_quant_idx != AV1_XFORM_QUANT_SKIP_QUANT) { - const int n_coeffs = av1_get_max_eob(tx_size); - if (LIKELY(!x->skip_block)) { - quant_func_list[xform_quant_idx][txfm_param.is_hbd]( - coeff, n_coeffs, p, qcoeff, dqcoeff, eob, scan_order, &qparam); - } else { - av1_quantize_skip(n_coeffs, qcoeff, dqcoeff, eob); - } - } - // NOTE: optimize_b_following is ture means av1_optimze_b will be called - // When the condition of doing optimize_b is changed, - // this flag need update simultaneously - const int optimize_b_following = - (xform_quant_idx != AV1_XFORM_QUANT_FP) || (txfm_param.lossless); - if (optimize_b_following) { - p->txb_entropy_ctx[block] = - (uint8_t)av1_get_txb_entropy_context(qcoeff, scan_order, *eob); - } else { - p->txb_entropy_ctx[block] = 0; - } - return; -} - -static void encode_block(int plane, int block, int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg, - int mi_row, int mi_col, RUN_TYPE dry_run) { - (void)mi_row; - (void)mi_col; - (void)dry_run; - struct encode_b_args *const args = arg; - const AV1_COMMON *const cm = &args->cpi->common; - MACROBLOCK *const x = args->x; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - struct macroblock_plane *const p = &x->plane[plane]; - struct macroblockd_plane *const pd = &xd->plane[plane]; - tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - uint8_t *dst; - ENTROPY_CONTEXT *a, *l; - int dummy_rate_cost = 0; - - const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - dst = &pd->dst - .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]]; - - a = &args->ta[blk_col]; - l = &args->tl[blk_row]; - - if (!is_blk_skip(x, plane, blk_row * bw + blk_col) && !mbmi->skip_mode) { - TX_TYPE tx_type = av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col, - tx_size, cm->reduced_tx_set_used); - if (args->enable_optimize_b) { - av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, - tx_size, tx_type, AV1_XFORM_QUANT_FP); - TXB_CTX txb_ctx; - get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx); - av1_optimize_b(args->cpi, x, plane, block, tx_size, tx_type, &txb_ctx, 1, - &dummy_rate_cost); - } else { - av1_xform_quant( - cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type, - USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP); - } - } else { - p->eobs[block] = 0; - p->txb_entropy_ctx[block] = 0; - } - - av1_set_txb_context(x, plane, block, tx_size, a, l); - - if (p->eobs[block]) { - *(args->skip) = 0; - - TX_TYPE tx_type = av1_get_tx_type(pd->plane_type, xd, blk_row, blk_col, - tx_size, cm->reduced_tx_set_used); - av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst, - pd->dst.stride, p->eobs[block], - cm->reduced_tx_set_used); - } - - if (p->eobs[block] == 0 && plane == 0) { - // TODO(debargha, jingning): Temporarily disable txk_type check for eob=0 - // case. It is possible that certain collision in hash index would cause - // the assertion failure. To further optimize the rate-distortion - // performance, we need to re-visit this part and enable this assert - // again. -#if 0 - if (args->cpi->oxcf.aq_mode == NO_AQ && - args->cpi->oxcf.deltaq_mode == NO_DELTA_Q) { - // TODO(jingning,angiebird,huisu@google.com): enable txk_check when - // enable_optimize_b is true to detect potential RD bug. - const uint8_t disable_txk_check = args->enable_optimize_b; - if (!disable_txk_check) { - assert(mbmi->txk_type[av1_get_txk_type_index(plane_bsize, blk_row, - blk_col)] == DCT_DCT); - } - } -#endif - update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, - DCT_DCT); - } - -#if CONFIG_MISMATCH_DEBUG - if (dry_run == OUTPUT_ENABLED) { - int pixel_c, pixel_r; - BLOCK_SIZE bsize = txsize_to_bsize[tx_size]; - int blk_w = block_size_wide[bsize]; - int blk_h = block_size_high[bsize]; - mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, blk_col, blk_row, - pd->subsampling_x, pd->subsampling_y); - mismatch_record_block_tx(dst, pd->dst.stride, cm->frame_offset, plane, - pixel_c, pixel_r, blk_w, blk_h, - xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); - } -#endif -} - -static void encode_block_inter(int plane, int block, int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, - void *arg, int mi_row, int mi_col, - RUN_TYPE dry_run) { - (void)mi_row; - (void)mi_col; - struct encode_b_args *const args = arg; - MACROBLOCK *const x = args->x; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int max_blocks_high = max_block_high(xd, plane_bsize, plane); - const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane); - - if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; - - const TX_SIZE plane_tx_size = - plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x, - pd->subsampling_y) - : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row, - blk_col)]; - if (!plane) { - assert(tx_size_wide[tx_size] >= tx_size_wide[plane_tx_size] && - tx_size_high[tx_size] >= tx_size_high[plane_tx_size]); - } - - if (tx_size == plane_tx_size || plane) { - encode_block(plane, block, blk_row, blk_col, plane_bsize, tx_size, arg, - mi_row, mi_col, dry_run); - } else { - assert(tx_size < TX_SIZES_ALL); - const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; - assert(IMPLIES(tx_size <= TX_4X4, sub_txs == tx_size)); - assert(IMPLIES(tx_size > TX_4X4, sub_txs < tx_size)); - // This is the square transform block partition entry point. - const int bsw = tx_size_wide_unit[sub_txs]; - const int bsh = tx_size_high_unit[sub_txs]; - const int step = bsh * bsw; - assert(bsw > 0 && bsh > 0); - - for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) { - for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) { - const int offsetr = blk_row + row; - const int offsetc = blk_col + col; - - if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; - - encode_block_inter(plane, block, offsetr, offsetc, plane_bsize, sub_txs, - arg, mi_row, mi_col, dry_run); - block += step; - } - } - } -} - -void av1_foreach_transformed_block_in_plane( - const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, - foreach_transformed_block_visitor visit, void *arg) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") - // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 - // transform size varies per plane, look it up in a common way. - const TX_SIZE tx_size = av1_get_tx_size(plane, xd); - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); - const uint8_t txw_unit = tx_size_wide_unit[tx_size]; - const uint8_t txh_unit = tx_size_high_unit[tx_size]; - const int step = txw_unit * txh_unit; - int i = 0, r, c; - - // If mb_to_right_edge is < 0 we are in a situation in which - // the current block size extends into the UMV and we won't - // visit the sub blocks that are wholly within the UMV. - const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane); - const int max_blocks_high = max_block_high(xd, plane_bsize, plane); - - int blk_row, blk_col; - - const BLOCK_SIZE max_unit_bsize = - get_plane_block_size(BLOCK_64X64, pd->subsampling_x, pd->subsampling_y); - int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0]; - int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0]; - mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide); - mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high); - - // Keep track of the row and column of the blocks we use so that we know - // if we are in the unrestricted motion border. - for (r = 0; r < max_blocks_high; r += mu_blocks_high) { - const int unit_height = AOMMIN(mu_blocks_high + r, max_blocks_high); - // Skip visiting the sub blocks that are wholly within the UMV. - for (c = 0; c < max_blocks_wide; c += mu_blocks_wide) { - const int unit_width = AOMMIN(mu_blocks_wide + c, max_blocks_wide); - for (blk_row = r; blk_row < unit_height; blk_row += txh_unit) { - for (blk_col = c; blk_col < unit_width; blk_col += txw_unit) { - visit(plane, i, blk_row, blk_col, plane_bsize, tx_size, arg); - i += step; - } - } - } - } -} - -void av1_foreach_transformed_block(const MACROBLOCKD *const xd, - BLOCK_SIZE bsize, int mi_row, int mi_col, - foreach_transformed_block_visitor visit, - void *arg, const int num_planes) { - for (int plane = 0; plane < num_planes; ++plane) { - if (!is_chroma_reference(mi_row, mi_col, bsize, - xd->plane[plane].subsampling_x, - xd->plane[plane].subsampling_y)) - continue; - av1_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg); - } -} - -typedef struct encode_block_pass1_args { - AV1_COMMON *cm; - MACROBLOCK *x; -} encode_block_pass1_args; - -static void encode_block_pass1(int plane, int block, int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, - void *arg) { - encode_block_pass1_args *args = (encode_block_pass1_args *)arg; - AV1_COMMON *cm = args->cm; - MACROBLOCK *const x = args->x; - MACROBLOCKD *const xd = &x->e_mbd; - struct macroblock_plane *const p = &x->plane[plane]; - struct macroblockd_plane *const pd = &xd->plane[plane]; - tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - TxfmParam txfm_param; - uint8_t *dst; - dst = &pd->dst - .buf[(blk_row * pd->dst.stride + blk_col) << tx_size_wide_log2[0]]; - av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, - DCT_DCT, AV1_XFORM_QUANT_B); - - if (p->eobs[block] > 0) { - txfm_param.bd = xd->bd; - txfm_param.is_hbd = get_bitdepth_data_path_index(xd); - txfm_param.tx_type = DCT_DCT; - txfm_param.tx_size = tx_size; - txfm_param.eob = p->eobs[block]; - txfm_param.lossless = xd->lossless[xd->mi[0]->segment_id]; - txfm_param.tx_set_type = av1_get_ext_tx_set_type( - txfm_param.tx_size, is_inter_block(xd->mi[0]), cm->reduced_tx_set_used); - if (txfm_param.is_hbd) { - av1_highbd_inv_txfm_add(dqcoeff, dst, pd->dst.stride, &txfm_param); - return; - } - av1_inv_txfm_add(dqcoeff, dst, pd->dst.stride, &txfm_param); - } -} - -void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize) { - encode_block_pass1_args args = { cm, x }; - av1_subtract_plane(x, bsize, 0); - av1_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, - encode_block_pass1, &args); -} - -void av1_encode_sb(const struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, - int mi_row, int mi_col, RUN_TYPE dry_run) { - (void)dry_run; - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *const xd = &x->e_mbd; - struct optimize_ctx ctx; - MB_MODE_INFO *mbmi = xd->mi[0]; - struct encode_b_args arg = { cpi, - x, - &ctx, - &mbmi->skip, - NULL, - NULL, - cpi->optimize_seg_arr[mbmi->segment_id] }; - int plane; - - mbmi->skip = 1; - - if (x->skip) return; - - for (plane = 0; plane < num_planes; ++plane) { - const int subsampling_x = xd->plane[plane].subsampling_x; - const int subsampling_y = xd->plane[plane].subsampling_y; - - if (!is_chroma_reference(mi_row, mi_col, bsize, subsampling_x, - subsampling_y)) - continue; - - const BLOCK_SIZE bsizec = - scale_chroma_bsize(bsize, subsampling_x, subsampling_y); - - // TODO(jingning): Clean this up. - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsizec, pd->subsampling_x, pd->subsampling_y); - const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0]; - const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, plane); - - const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; - const int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0]; - const int bh = block_size_high[txb_size] >> tx_size_high_log2[0]; - int idx, idy; - int block = 0; - int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size]; - av1_get_entropy_contexts(bsizec, pd, ctx.ta[plane], ctx.tl[plane]); - - av1_subtract_plane(x, bsizec, plane); - - arg.ta = ctx.ta[plane]; - arg.tl = ctx.tl[plane]; - - const BLOCK_SIZE max_unit_bsize = - get_plane_block_size(BLOCK_64X64, pd->subsampling_x, pd->subsampling_y); - int mu_blocks_wide = - block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0]; - int mu_blocks_high = - block_size_high[max_unit_bsize] >> tx_size_high_log2[0]; - - mu_blocks_wide = AOMMIN(mi_width, mu_blocks_wide); - mu_blocks_high = AOMMIN(mi_height, mu_blocks_high); - - for (idy = 0; idy < mi_height; idy += mu_blocks_high) { - for (idx = 0; idx < mi_width; idx += mu_blocks_wide) { - int blk_row, blk_col; - const int unit_height = AOMMIN(mu_blocks_high + idy, mi_height); - const int unit_width = AOMMIN(mu_blocks_wide + idx, mi_width); - for (blk_row = idy; blk_row < unit_height; blk_row += bh) { - for (blk_col = idx; blk_col < unit_width; blk_col += bw) { - encode_block_inter(plane, block, blk_row, blk_col, plane_bsize, - max_tx_size, &arg, mi_row, mi_col, dry_run); - block += step; - } - } - } - } - } -} - -static void encode_block_intra_and_set_context(int plane, int block, - int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - av1_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size, - arg); - - struct encode_b_args *const args = arg; - MACROBLOCK *x = args->x; - ENTROPY_CONTEXT *a = &args->ta[blk_col]; - ENTROPY_CONTEXT *l = &args->tl[blk_row]; - av1_set_txb_context(x, plane, block, tx_size, a, l); -} - -void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, - void *arg) { - struct encode_b_args *const args = arg; - const AV1_COMMON *const cm = &args->cpi->common; - MACROBLOCK *const x = args->x; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - struct macroblock_plane *const p = &x->plane[plane]; - struct macroblockd_plane *const pd = &xd->plane[plane]; - tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - PLANE_TYPE plane_type = get_plane_type(plane); - const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, - tx_size, cm->reduced_tx_set_used); - uint16_t *eob = &p->eobs[block]; - const int dst_stride = pd->dst.stride; - uint8_t *dst = - &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; - int dummy_rate_cost = 0; - - av1_predict_intra_block_facade(cm, xd, plane, blk_col, blk_row, tx_size); - - const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - if (plane == 0 && is_blk_skip(x, plane, blk_row * bw + blk_col)) { - *eob = 0; - p->txb_entropy_ctx[block] = 0; - } else { - av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size); - - const ENTROPY_CONTEXT *a = &args->ta[blk_col]; - const ENTROPY_CONTEXT *l = &args->tl[blk_row]; - if (args->enable_optimize_b) { - av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, - tx_size, tx_type, AV1_XFORM_QUANT_FP); - TXB_CTX txb_ctx; - get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx); - av1_optimize_b(args->cpi, x, plane, block, tx_size, tx_type, &txb_ctx, 1, - &dummy_rate_cost); - } else { - av1_xform_quant( - cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type, - USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP); - } - } - - if (*eob) { - av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst, - dst_stride, *eob, cm->reduced_tx_set_used); - } - - if (*eob == 0 && plane == 0) { - // TODO(jingning): Temporarily disable txk_type check for eob=0 case. - // It is possible that certain collision in hash index would cause - // the assertion failure. To further optimize the rate-distortion - // performance, we need to re-visit this part and enable this assert - // again. -#if 0 - if (args->cpi->oxcf.aq_mode == NO_AQ - && args->cpi->oxcf.deltaq_mode == NO_DELTA_Q) { - assert(mbmi->txk_type[av1_get_txk_type_index(plane_bsize, blk_row, - blk_col)] == DCT_DCT); - } -#endif - update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, - DCT_DCT); - } - - // For intra mode, skipped blocks are so rare that transmitting skip=1 is - // very expensive. - *(args->skip) = 0; - - if (plane == AOM_PLANE_Y && xd->cfl.store_y) { - cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize); - } -} - -void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int plane, - int enable_optimize_b, int mi_row, - int mi_col) { - const MACROBLOCKD *const xd = &x->e_mbd; - ENTROPY_CONTEXT ta[MAX_MIB_SIZE] = { 0 }; - ENTROPY_CONTEXT tl[MAX_MIB_SIZE] = { 0 }; - - struct encode_b_args arg = { - cpi, x, NULL, &(xd->mi[0]->skip), ta, tl, enable_optimize_b - }; - - if (!is_chroma_reference(mi_row, mi_col, bsize, - xd->plane[plane].subsampling_x, - xd->plane[plane].subsampling_y)) - return; - - if (enable_optimize_b) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - av1_get_entropy_contexts(bsize, pd, ta, tl); - } - av1_foreach_transformed_block_in_plane( - xd, bsize, plane, encode_block_intra_and_set_context, &arg); -} diff --git a/third_party/aom/av1/encoder/encodemb.h b/third_party/aom/av1/encoder/encodemb.h deleted file mode 100644 index 39080de59..000000000 --- a/third_party/aom/av1/encoder/encodemb.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_ENCODEMB_H_ -#define AOM_AV1_ENCODER_ENCODEMB_H_ - -#include "config/aom_config.h" - -#include "av1/common/onyxc_int.h" -#include "av1/common/txb_common.h" -#include "av1/encoder/block.h" -#include "av1/encoder/tokenize.h" -#ifdef __cplusplus -extern "C" { -#endif - -struct optimize_ctx { - ENTROPY_CONTEXT ta[MAX_MB_PLANE][MAX_MIB_SIZE]; - ENTROPY_CONTEXT tl[MAX_MB_PLANE][MAX_MIB_SIZE]; -}; - -struct encode_b_args { - const struct AV1_COMP *cpi; - MACROBLOCK *x; - struct optimize_ctx *ctx; - int8_t *skip; - ENTROPY_CONTEXT *ta; - ENTROPY_CONTEXT *tl; - int8_t enable_optimize_b; -}; - -typedef enum AV1_XFORM_QUANT { - AV1_XFORM_QUANT_FP = 0, - AV1_XFORM_QUANT_B = 1, - AV1_XFORM_QUANT_DC = 2, - AV1_XFORM_QUANT_SKIP_QUANT, - AV1_XFORM_QUANT_TYPES, -} AV1_XFORM_QUANT; - -void av1_encode_sb(const struct AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, - int mi_row, int mi_col, RUN_TYPE dry_run); - -void av1_foreach_transformed_block_in_plane( - const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, - foreach_transformed_block_visitor visit, void *arg); - -void av1_foreach_transformed_block(const MACROBLOCKD *const xd, - BLOCK_SIZE bsize, int mi_row, int mi_col, - foreach_transformed_block_visitor visit, - void *arg, const int num_planes); - -void av1_encode_sby_pass1(AV1_COMMON *cm, MACROBLOCK *x, BLOCK_SIZE bsize); - -void av1_xform_quant(const AV1_COMMON *cm, MACROBLOCK *x, int plane, int block, - int blk_row, int blk_col, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, TX_TYPE tx_type, - AV1_XFORM_QUANT xform_quant_idx); - -int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane, - int block, TX_SIZE tx_size, TX_TYPE tx_type, - const TXB_CTX *const txb_ctx, int fast_mode, int *rate_cost); - -void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize, - int blk_col, int blk_row, TX_SIZE tx_size); - -void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); - -static INLINE void av1_set_txb_context(MACROBLOCK *x, int plane, int block, - TX_SIZE tx_size, ENTROPY_CONTEXT *a, - ENTROPY_CONTEXT *l) { - const uint8_t ctx = x->plane[plane].txb_entropy_ctx[block]; - memset(a, ctx, tx_size_wide_unit[tx_size] * sizeof(*a)); - memset(l, ctx, tx_size_high_unit[tx_size] * sizeof(*l)); -} - -void av1_encode_block_intra(int plane, int block, int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg); - -void av1_encode_intra_block_plane(const struct AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int plane, - int enable_optimize_b, int mi_row, - int mi_col); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_ENCODEMB_H_ diff --git a/third_party/aom/av1/encoder/encodemv.c b/third_party/aom/av1/encoder/encodemv.c deleted file mode 100644 index 42eb5abf6..000000000 --- a/third_party/aom/av1/encoder/encodemv.c +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include - -#include "av1/common/common.h" -#include "av1/common/entropymode.h" - -#include "av1/encoder/cost.h" -#include "av1/encoder/encodemv.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_ports/bitops.h" - -static INLINE int mv_class_base(MV_CLASS_TYPE c) { - return c ? CLASS0_SIZE << (c + 2) : 0; -} - -// If n != 0, returns the floor of log base 2 of n. If n == 0, returns 0. -static INLINE uint8_t log_in_base_2(unsigned int n) { - // get_msb() is only valid when n != 0. - return n == 0 ? 0 : get_msb(n); -} - -static INLINE MV_CLASS_TYPE get_mv_class(int z, int *offset) { - const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) - ? MV_CLASS_10 - : (MV_CLASS_TYPE)log_in_base_2(z >> 3); - if (offset) *offset = z - mv_class_base(c); - return c; -} - -static void encode_mv_component(aom_writer *w, int comp, nmv_component *mvcomp, - MvSubpelPrecision precision) { - assert(comp != 0); - int offset; - const int sign = comp < 0; - const int mag = sign ? -comp : comp; - const int mv_class = get_mv_class(mag - 1, &offset); - const int d = offset >> 3; // int mv data - const int fr = (offset >> 1) & 3; // fractional mv data - const int hp = offset & 1; // high precision mv data - - // Sign - aom_write_symbol(w, sign, mvcomp->sign_cdf, 2); - - // Class - aom_write_symbol(w, mv_class, mvcomp->classes_cdf, MV_CLASSES); - - // Integer bits - if (mv_class == MV_CLASS_0) { - aom_write_symbol(w, d, mvcomp->class0_cdf, CLASS0_SIZE); - } else { - int i; - const int n = mv_class + CLASS0_BITS - 1; // number of bits - for (i = 0; i < n; ++i) - aom_write_symbol(w, (d >> i) & 1, mvcomp->bits_cdf[i], 2); - } - // Fractional bits - if (precision > MV_SUBPEL_NONE) { - aom_write_symbol( - w, fr, - mv_class == MV_CLASS_0 ? mvcomp->class0_fp_cdf[d] : mvcomp->fp_cdf, - MV_FP_SIZE); - } - - // High precision bit - if (precision > MV_SUBPEL_LOW_PRECISION) - aom_write_symbol( - w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp_cdf : mvcomp->hp_cdf, - 2); -} - -static void build_nmv_component_cost_table(int *mvcost, - const nmv_component *const mvcomp, - MvSubpelPrecision precision) { - int i, v; - int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE]; - int bits_cost[MV_OFFSET_BITS][2]; - int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE], fp_cost[MV_FP_SIZE]; - int class0_hp_cost[2], hp_cost[2]; - - av1_cost_tokens_from_cdf(sign_cost, mvcomp->sign_cdf, NULL); - av1_cost_tokens_from_cdf(class_cost, mvcomp->classes_cdf, NULL); - av1_cost_tokens_from_cdf(class0_cost, mvcomp->class0_cdf, NULL); - for (i = 0; i < MV_OFFSET_BITS; ++i) { - av1_cost_tokens_from_cdf(bits_cost[i], mvcomp->bits_cdf[i], NULL); - } - - for (i = 0; i < CLASS0_SIZE; ++i) - av1_cost_tokens_from_cdf(class0_fp_cost[i], mvcomp->class0_fp_cdf[i], NULL); - av1_cost_tokens_from_cdf(fp_cost, mvcomp->fp_cdf, NULL); - - if (precision > MV_SUBPEL_LOW_PRECISION) { - av1_cost_tokens_from_cdf(class0_hp_cost, mvcomp->class0_hp_cdf, NULL); - av1_cost_tokens_from_cdf(hp_cost, mvcomp->hp_cdf, NULL); - } - mvcost[0] = 0; - for (v = 1; v <= MV_MAX; ++v) { - int z, c, o, d, e, f, cost = 0; - z = v - 1; - c = get_mv_class(z, &o); - cost += class_cost[c]; - d = (o >> 3); /* int mv data */ - f = (o >> 1) & 3; /* fractional pel mv data */ - e = (o & 1); /* high precision mv data */ - if (c == MV_CLASS_0) { - cost += class0_cost[d]; - } else { - const int b = c + CLASS0_BITS - 1; /* number of bits */ - for (i = 0; i < b; ++i) cost += bits_cost[i][((d >> i) & 1)]; - } - if (precision > MV_SUBPEL_NONE) { - if (c == MV_CLASS_0) { - cost += class0_fp_cost[d][f]; - } else { - cost += fp_cost[f]; - } - if (precision > MV_SUBPEL_LOW_PRECISION) { - if (c == MV_CLASS_0) { - cost += class0_hp_cost[e]; - } else { - cost += hp_cost[e]; - } - } - } - mvcost[v] = cost + sign_cost[0]; - mvcost[-v] = cost + sign_cost[1]; - } -} - -void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref, - nmv_context *mvctx, int usehp) { - const MV diff = { mv->row - ref->row, mv->col - ref->col }; - const MV_JOINT_TYPE j = av1_get_mv_joint(&diff); - if (cpi->common.cur_frame_force_integer_mv) { - usehp = MV_SUBPEL_NONE; - } - aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS); - if (mv_joint_vertical(j)) - encode_mv_component(w, diff.row, &mvctx->comps[0], usehp); - - if (mv_joint_horizontal(j)) - encode_mv_component(w, diff.col, &mvctx->comps[1], usehp); - - // If auto_mv_step_size is enabled then keep track of the largest - // motion vector component used. - if (cpi->sf.mv.auto_mv_step_size) { - unsigned int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3; - cpi->max_mv_magnitude = AOMMAX(maxv, cpi->max_mv_magnitude); - } -} - -void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref, - nmv_context *mvctx) { - // DV and ref DV should not have sub-pel. - assert((mv->col & 7) == 0); - assert((mv->row & 7) == 0); - assert((ref->col & 7) == 0); - assert((ref->row & 7) == 0); - const MV diff = { mv->row - ref->row, mv->col - ref->col }; - const MV_JOINT_TYPE j = av1_get_mv_joint(&diff); - - aom_write_symbol(w, j, mvctx->joints_cdf, MV_JOINTS); - if (mv_joint_vertical(j)) - encode_mv_component(w, diff.row, &mvctx->comps[0], MV_SUBPEL_NONE); - - if (mv_joint_horizontal(j)) - encode_mv_component(w, diff.col, &mvctx->comps[1], MV_SUBPEL_NONE); -} - -void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2], - const nmv_context *ctx, - MvSubpelPrecision precision) { - av1_cost_tokens_from_cdf(mvjoint, ctx->joints_cdf, NULL); - build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], precision); - build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], precision); -} - -int_mv av1_get_ref_mv_from_stack(int ref_idx, - const MV_REFERENCE_FRAME *ref_frame, - int ref_mv_idx, - const MB_MODE_INFO_EXT *mbmi_ext) { - const int8_t ref_frame_type = av1_ref_frame_type(ref_frame); - const CANDIDATE_MV *curr_ref_mv_stack = - mbmi_ext->ref_mv_stack[ref_frame_type]; - int_mv ref_mv; - ref_mv.as_int = INVALID_MV; - - if (ref_frame[1] > INTRA_FRAME) { - if (ref_idx == 0) { - ref_mv = curr_ref_mv_stack[ref_mv_idx].this_mv; - } else { - assert(ref_idx == 1); - ref_mv = curr_ref_mv_stack[ref_mv_idx].comp_mv; - } - } else { - assert(ref_idx == 0); - if (ref_mv_idx < mbmi_ext->ref_mv_count[ref_frame_type]) { - ref_mv = curr_ref_mv_stack[ref_mv_idx].this_mv; - } else { - ref_mv = mbmi_ext->global_mvs[ref_frame_type]; - } - } - return ref_mv; -} - -int_mv av1_get_ref_mv(const MACROBLOCK *x, int ref_idx) { - const MACROBLOCKD *xd = &x->e_mbd; - const MB_MODE_INFO *mbmi = xd->mi[0]; - int ref_mv_idx = mbmi->ref_mv_idx; - if (mbmi->mode == NEAR_NEWMV || mbmi->mode == NEW_NEARMV) { - assert(has_second_ref(mbmi)); - ref_mv_idx += 1; - } - return av1_get_ref_mv_from_stack(ref_idx, mbmi->ref_frame, ref_mv_idx, - x->mbmi_ext); -} - -void av1_find_best_ref_mvs_from_stack(int allow_hp, - const MB_MODE_INFO_EXT *mbmi_ext, - MV_REFERENCE_FRAME ref_frame, - int_mv *nearest_mv, int_mv *near_mv, - int is_integer) { - const int ref_idx = 0; - MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME }; - *nearest_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 0, mbmi_ext); - lower_mv_precision(&nearest_mv->as_mv, allow_hp, is_integer); - *near_mv = av1_get_ref_mv_from_stack(ref_idx, ref_frames, 1, mbmi_ext); - lower_mv_precision(&near_mv->as_mv, allow_hp, is_integer); -} diff --git a/third_party/aom/av1/encoder/encodemv.h b/third_party/aom/av1/encoder/encodemv.h deleted file mode 100644 index 37ff547c8..000000000 --- a/third_party/aom/av1/encoder/encodemv.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_ENCODEMV_H_ -#define AOM_AV1_ENCODER_ENCODEMV_H_ - -#include "av1/encoder/encoder.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref, - nmv_context *mvctx, int usehp); - -void av1_build_nmv_cost_table(int *mvjoint, int *mvcost[2], - const nmv_context *mvctx, - MvSubpelPrecision precision); - -void av1_update_mv_count(ThreadData *td); - -void av1_encode_dv(aom_writer *w, const MV *mv, const MV *ref, - nmv_context *mvctx); -int_mv av1_get_ref_mv(const MACROBLOCK *x, int ref_idx); -int_mv av1_get_ref_mv_from_stack(int ref_idx, - const MV_REFERENCE_FRAME *ref_frame, - int ref_mv_idx, - const MB_MODE_INFO_EXT *mbmi_ext); -void av1_find_best_ref_mvs_from_stack(int allow_hp, - const MB_MODE_INFO_EXT *mbmi_ext, - MV_REFERENCE_FRAME ref_frame, - int_mv *nearest_mv, int_mv *near_mv, - int is_integer); - -static INLINE MV_JOINT_TYPE av1_get_mv_joint(const MV *mv) { - if (mv->row == 0) { - return mv->col == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ; - } else { - return mv->col == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ; - } -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_ENCODEMV_H_ diff --git a/third_party/aom/av1/encoder/encoder.c b/third_party/aom/av1/encoder/encoder.c deleted file mode 100644 index a2da2df89..000000000 --- a/third_party/aom/av1/encoder/encoder.c +++ /dev/null @@ -1,6437 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include - -#include "config/aom_config.h" -#include "config/aom_dsp_rtcd.h" -#include "config/aom_scale_rtcd.h" -#include "config/av1_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/aom_filter.h" -#if CONFIG_DENOISE -#include "aom_dsp/grain_table.h" -#include "aom_dsp/noise_util.h" -#include "aom_dsp/noise_model.h" -#endif -#include "aom_dsp/psnr.h" -#if CONFIG_INTERNAL_STATS -#include "aom_dsp/ssim.h" -#endif -#include "aom_ports/aom_timer.h" -#include "aom_ports/mem.h" -#include "aom_ports/system_state.h" -#include "aom_scale/aom_scale.h" -#if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG -#include "aom_util/debug_util.h" -#endif // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG - -#include "av1/common/alloccommon.h" -#include "av1/common/cdef.h" -#include "av1/common/filter.h" -#include "av1/common/idct.h" -#include "av1/common/reconinter.h" -#include "av1/common/reconintra.h" -#include "av1/common/resize.h" -#include "av1/common/tile_common.h" - -#include "av1/encoder/aq_complexity.h" -#include "av1/encoder/aq_cyclicrefresh.h" -#include "av1/encoder/aq_variance.h" -#include "av1/encoder/bitstream.h" -#include "av1/encoder/context_tree.h" -#include "av1/encoder/encodeframe.h" -#include "av1/encoder/encodemv.h" -#include "av1/encoder/encoder.h" -#include "av1/encoder/encodetxb.h" -#include "av1/encoder/ethread.h" -#include "av1/encoder/firstpass.h" -#include "av1/encoder/grain_test_vectors.h" -#include "av1/encoder/hash_motion.h" -#include "av1/encoder/mbgraph.h" -#include "av1/encoder/picklpf.h" -#include "av1/encoder/pickrst.h" -#include "av1/encoder/random.h" -#include "av1/encoder/ratectrl.h" -#include "av1/encoder/rd.h" -#include "av1/encoder/segmentation.h" -#include "av1/encoder/speed_features.h" -#include "av1/encoder/temporal_filter.h" - -#define DEFAULT_EXPLICIT_ORDER_HINT_BITS 7 - -// av1 uses 10,000,000 ticks/second as time stamp -#define TICKS_PER_SEC 10000000LL - -#if CONFIG_ENTROPY_STATS -FRAME_COUNTS aggregate_fc; -#endif // CONFIG_ENTROPY_STATS - -#define AM_SEGMENT_ID_INACTIVE 7 -#define AM_SEGMENT_ID_ACTIVE 0 - -// Whether to use high precision mv for altref computation. -#define ALTREF_HIGH_PRECISION_MV 1 - -// Q threshold for high precision mv. Choose a very high value for now so that -// HIGH_PRECISION is always chosen. -#define HIGH_PRECISION_MV_QTHRESH 200 - -// #define OUTPUT_YUV_REC -#ifdef OUTPUT_YUV_SKINMAP -FILE *yuv_skinmap_file = NULL; -#endif -#ifdef OUTPUT_YUV_REC -FILE *yuv_rec_file; -#define FILE_NAME_LEN 100 -#endif - -static INLINE void Scale2Ratio(AOM_SCALING mode, int *hr, int *hs) { - switch (mode) { - case NORMAL: - *hr = 1; - *hs = 1; - break; - case FOURFIVE: - *hr = 4; - *hs = 5; - break; - case THREEFIVE: - *hr = 3; - *hs = 5; - break; - case ONETWO: - *hr = 1; - *hs = 2; - break; - default: - *hr = 1; - *hs = 1; - assert(0); - break; - } -} - -// Mark all inactive blocks as active. Other segmentation features may be set -// so memset cannot be used, instead only inactive blocks should be reset. -static void suppress_active_map(AV1_COMP *cpi) { - unsigned char *const seg_map = cpi->segmentation_map; - int i; - if (cpi->active_map.enabled || cpi->active_map.update) - for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i) - if (seg_map[i] == AM_SEGMENT_ID_INACTIVE) - seg_map[i] = AM_SEGMENT_ID_ACTIVE; -} - -static void apply_active_map(AV1_COMP *cpi) { - struct segmentation *const seg = &cpi->common.seg; - unsigned char *const seg_map = cpi->segmentation_map; - const unsigned char *const active_map = cpi->active_map.map; - int i; - - assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE); - - if (frame_is_intra_only(&cpi->common)) { - cpi->active_map.enabled = 0; - cpi->active_map.update = 1; - } - - if (cpi->active_map.update) { - if (cpi->active_map.enabled) { - for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i) - if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i]; - av1_enable_segmentation(seg); - av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP); - av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_H); - av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_V); - av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_U); - av1_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_V); - - av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_H, - -MAX_LOOP_FILTER); - av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_V, - -MAX_LOOP_FILTER); - av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_U, - -MAX_LOOP_FILTER); - av1_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_V, - -MAX_LOOP_FILTER); - } else { - av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP); - av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_H); - av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_Y_V); - av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_U); - av1_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF_V); - if (seg->enabled) { - seg->update_data = 1; - seg->update_map = 1; - } - } - cpi->active_map.update = 0; - } -} - -int av1_set_active_map(AV1_COMP *cpi, unsigned char *new_map_16x16, int rows, - int cols) { - if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) { - unsigned char *const active_map_8x8 = cpi->active_map.map; - const int mi_rows = cpi->common.mi_rows; - const int mi_cols = cpi->common.mi_cols; - const int row_scale = mi_size_high[BLOCK_16X16] == 2 ? 1 : 2; - const int col_scale = mi_size_wide[BLOCK_16X16] == 2 ? 1 : 2; - cpi->active_map.update = 1; - if (new_map_16x16) { - int r, c; - for (r = 0; r < mi_rows; ++r) { - for (c = 0; c < mi_cols; ++c) { - active_map_8x8[r * mi_cols + c] = - new_map_16x16[(r >> row_scale) * cols + (c >> col_scale)] - ? AM_SEGMENT_ID_ACTIVE - : AM_SEGMENT_ID_INACTIVE; - } - } - cpi->active_map.enabled = 1; - } else { - cpi->active_map.enabled = 0; - } - return 0; - } else { - return -1; - } -} - -int av1_get_active_map(AV1_COMP *cpi, unsigned char *new_map_16x16, int rows, - int cols) { - if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols && - new_map_16x16) { - unsigned char *const seg_map_8x8 = cpi->segmentation_map; - const int mi_rows = cpi->common.mi_rows; - const int mi_cols = cpi->common.mi_cols; - const int row_scale = mi_size_high[BLOCK_16X16] == 2 ? 1 : 2; - const int col_scale = mi_size_wide[BLOCK_16X16] == 2 ? 1 : 2; - - memset(new_map_16x16, !cpi->active_map.enabled, rows * cols); - if (cpi->active_map.enabled) { - int r, c; - for (r = 0; r < mi_rows; ++r) { - for (c = 0; c < mi_cols; ++c) { - // Cyclic refresh segments are considered active despite not having - // AM_SEGMENT_ID_ACTIVE - new_map_16x16[(r >> row_scale) * cols + (c >> col_scale)] |= - seg_map_8x8[r * mi_cols + c] != AM_SEGMENT_ID_INACTIVE; - } - } - } - return 0; - } else { - return -1; - } -} - -static void set_high_precision_mv(AV1_COMP *cpi, int allow_high_precision_mv, - int cur_frame_force_integer_mv) { - MACROBLOCK *const mb = &cpi->td.mb; - cpi->common.allow_high_precision_mv = - allow_high_precision_mv && cur_frame_force_integer_mv == 0; - const int copy_hp = - cpi->common.allow_high_precision_mv && cur_frame_force_integer_mv == 0; - int *(*src)[2] = copy_hp ? &mb->nmvcost_hp : &mb->nmvcost; - mb->mv_cost_stack = *src; -} - -static BLOCK_SIZE select_sb_size(const AV1_COMP *const cpi) { - const AV1_COMMON *const cm = &cpi->common; - - if (cpi->oxcf.superblock_size == AOM_SUPERBLOCK_SIZE_64X64) - return BLOCK_64X64; -#if CONFIG_FILEOPTIONS - if (cm->options && cm->options->ext_partition) -#endif - if (cpi->oxcf.superblock_size == AOM_SUPERBLOCK_SIZE_128X128) - return BLOCK_128X128; - - assert(cpi->oxcf.superblock_size == AOM_SUPERBLOCK_SIZE_DYNAMIC); - -// TODO(any): Possibly could improve this with a heuristic. -#if CONFIG_FILEOPTIONS - if (cm->options && !cm->options->ext_partition) return BLOCK_64X64; -#endif - - // When superres / resize is on, 'cm->width / height' can change between - // calls, so we don't apply this heuristic there. Also, this heuristic gives - // compression gain for speed >= 2 only. - if (cpi->oxcf.superres_mode == SUPERRES_NONE && - cpi->oxcf.resize_mode == RESIZE_NONE && cpi->oxcf.speed >= 2) { - return (cm->width >= 480 && cm->height >= 360) ? BLOCK_128X128 - : BLOCK_64X64; - } - - return BLOCK_128X128; -} - -static void setup_frame(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - // Set up entropy context depending on frame type. The decoder mandates - // the use of the default context, index 0, for keyframes and inter - // frames where the error_resilient_mode or intra_only flag is set. For - // other inter-frames the encoder currently uses only two contexts; - // context 1 for ALTREF frames and context 0 for the others. - - cm->primary_ref_frame = PRIMARY_REF_NONE; - if (frame_is_intra_only(cm) || cm->error_resilient_mode || - cm->force_primary_ref_none) { - av1_setup_past_independence(cm); - for (int i = 0; i < REF_FRAMES; i++) { - cm->fb_of_context_type[i] = -1; - } - cm->fb_of_context_type[REGULAR_FRAME] = - cm->show_frame ? get_ref_frame_map_idx(cpi, GOLDEN_FRAME) - : get_ref_frame_map_idx(cpi, ALTREF_FRAME); - cm->frame_context_idx = REGULAR_FRAME; - } else { - const GF_GROUP *gf_group = &cpi->twopass.gf_group; - if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) - cm->frame_context_idx = EXT_ARF_FRAME; - else if (cpi->refresh_alt_ref_frame) - cm->frame_context_idx = ARF_FRAME; - else if (cpi->rc.is_src_frame_alt_ref) - cm->frame_context_idx = OVERLAY_FRAME; - else if (cpi->refresh_golden_frame) - cm->frame_context_idx = GLD_FRAME; - else if (cpi->refresh_bwd_ref_frame) - cm->frame_context_idx = BRF_FRAME; - else - cm->frame_context_idx = REGULAR_FRAME; - int wanted_fb = cm->fb_of_context_type[cm->frame_context_idx]; - for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { - int fb = get_ref_frame_map_idx(cpi, ref_frame); - if (fb == wanted_fb) { - cm->primary_ref_frame = ref_frame - LAST_FRAME; - } - } - } - - if (cm->frame_type == KEY_FRAME && cm->show_frame) { - cpi->refresh_golden_frame = 1; - cpi->refresh_alt_ref_frame = 1; - av1_zero(cpi->interp_filter_selected); - set_sb_size(&cm->seq_params, select_sb_size(cpi)); - set_use_reference_buffer(cm, 0); - } else if (frame_is_sframe(cm)) { - cpi->refresh_golden_frame = 1; - cpi->refresh_alt_ref_frame = 1; - av1_zero(cpi->interp_filter_selected); - set_sb_size(&cm->seq_params, select_sb_size(cpi)); - } else { - if (cm->primary_ref_frame == PRIMARY_REF_NONE || - cm->frame_refs[cm->primary_ref_frame].idx < 0) { - av1_setup_past_independence(cm); - cm->seg.update_map = 1; - cm->seg.update_data = 1; - } else { - *cm->fc = cm->frame_contexts[cm->frame_refs[cm->primary_ref_frame].idx]; - } - av1_zero(cpi->interp_filter_selected[0]); - } - - cm->prev_frame = get_prev_frame(cm); - cpi->vaq_refresh = 0; -} - -static void enc_setup_mi(AV1_COMMON *cm) { - int i; - int mi_rows_sb_aligned = calc_mi_size(cm->mi_rows); - cm->mi = cm->mip; - memset(cm->mip, 0, cm->mi_stride * mi_rows_sb_aligned * sizeof(*cm->mip)); - cm->prev_mi = cm->prev_mip; - // Clear top border row - memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride); - // Clear left border column - for (i = 0; i < mi_rows_sb_aligned; ++i) - memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip)); - cm->mi_grid_visible = cm->mi_grid_base; - cm->prev_mi_grid_visible = cm->prev_mi_grid_base; - - memset(cm->mi_grid_base, 0, - cm->mi_stride * mi_rows_sb_aligned * sizeof(*cm->mi_grid_base)); -} - -static int enc_alloc_mi(AV1_COMMON *cm, int mi_size) { - cm->mip = aom_calloc(mi_size, sizeof(*cm->mip)); - if (!cm->mip) return 1; - cm->prev_mip = aom_calloc(mi_size, sizeof(*cm->prev_mip)); - if (!cm->prev_mip) return 1; - cm->mi_alloc_size = mi_size; - - cm->mi_grid_base = - (MB_MODE_INFO **)aom_calloc(mi_size, sizeof(MB_MODE_INFO *)); - if (!cm->mi_grid_base) return 1; - cm->prev_mi_grid_base = - (MB_MODE_INFO **)aom_calloc(mi_size, sizeof(MB_MODE_INFO *)); - if (!cm->prev_mi_grid_base) return 1; - - return 0; -} - -static void enc_free_mi(AV1_COMMON *cm) { - aom_free(cm->mip); - cm->mip = NULL; - aom_free(cm->prev_mip); - cm->prev_mip = NULL; - aom_free(cm->mi_grid_base); - cm->mi_grid_base = NULL; - aom_free(cm->prev_mi_grid_base); - cm->prev_mi_grid_base = NULL; - cm->mi_alloc_size = 0; -} - -static void swap_mi_and_prev_mi(AV1_COMMON *cm) { - // Current mip will be the prev_mip for the next frame. - MB_MODE_INFO **temp_base = cm->prev_mi_grid_base; - MB_MODE_INFO *temp = cm->prev_mip; - cm->prev_mip = cm->mip; - cm->mip = temp; - - // Update the upper left visible macroblock ptrs. - cm->mi = cm->mip; - cm->prev_mi = cm->prev_mip; - - cm->prev_mi_grid_base = cm->mi_grid_base; - cm->mi_grid_base = temp_base; - cm->mi_grid_visible = cm->mi_grid_base; - cm->prev_mi_grid_visible = cm->prev_mi_grid_base; -} - -void av1_initialize_enc(void) { - av1_rtcd(); - aom_dsp_rtcd(); - aom_scale_rtcd(); - av1_init_intra_predictors(); - av1_init_me_luts(); - av1_rc_init_minq_luts(); - av1_init_wedge_masks(); -} - -static void dealloc_context_buffers_ext(AV1_COMP *cpi) { - if (cpi->mbmi_ext_base) { - aom_free(cpi->mbmi_ext_base); - cpi->mbmi_ext_base = NULL; - } -} - -static void alloc_context_buffers_ext(AV1_COMP *cpi) { - AV1_COMMON *cm = &cpi->common; - int mi_size = cm->mi_cols * cm->mi_rows; - - dealloc_context_buffers_ext(cpi); - CHECK_MEM_ERROR(cm, cpi->mbmi_ext_base, - aom_calloc(mi_size, sizeof(*cpi->mbmi_ext_base))); -} - -static void update_film_grain_parameters(struct AV1_COMP *cpi, - const AV1EncoderConfig *oxcf) { - AV1_COMMON *const cm = &cpi->common; - cpi->oxcf = *oxcf; - - if (cpi->film_grain_table) { - aom_film_grain_table_free(cpi->film_grain_table); - aom_free(cpi->film_grain_table); - cpi->film_grain_table = NULL; - } - - if (oxcf->film_grain_test_vector) { - cm->seq_params.film_grain_params_present = 1; - if (cm->frame_type == KEY_FRAME) { - memcpy(&cm->film_grain_params, - film_grain_test_vectors + oxcf->film_grain_test_vector - 1, - sizeof(cm->film_grain_params)); - - cm->film_grain_params.bit_depth = cm->seq_params.bit_depth; - if (cm->seq_params.color_range == AOM_CR_FULL_RANGE) { - cm->film_grain_params.clip_to_restricted_range = 0; - } - } - } else if (oxcf->film_grain_table_filename) { - cpi->film_grain_table = aom_malloc(sizeof(*cpi->film_grain_table)); - memset(cpi->film_grain_table, 0, sizeof(aom_film_grain_table_t)); - - aom_film_grain_table_read(cpi->film_grain_table, - oxcf->film_grain_table_filename, &cm->error); - } else { - cm->seq_params.film_grain_params_present = 0; - memset(&cm->film_grain_params, 0, sizeof(cm->film_grain_params)); - } -} - -static void dealloc_compressor_data(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - - dealloc_context_buffers_ext(cpi); - - aom_free(cpi->tile_data); - cpi->tile_data = NULL; - - // Delete sementation map - aom_free(cpi->segmentation_map); - cpi->segmentation_map = NULL; - - av1_cyclic_refresh_free(cpi->cyclic_refresh); - cpi->cyclic_refresh = NULL; - - aom_free(cpi->active_map.map); - cpi->active_map.map = NULL; - - aom_free(cpi->td.mb.above_pred_buf); - cpi->td.mb.above_pred_buf = NULL; - - aom_free(cpi->td.mb.left_pred_buf); - cpi->td.mb.left_pred_buf = NULL; - - aom_free(cpi->td.mb.wsrc_buf); - cpi->td.mb.wsrc_buf = NULL; - - for (int i = 0; i < 2; i++) - for (int j = 0; j < 2; j++) { - aom_free(cpi->td.mb.hash_value_buffer[i][j]); - cpi->td.mb.hash_value_buffer[i][j] = NULL; - } - aom_free(cpi->td.mb.mask_buf); - cpi->td.mb.mask_buf = NULL; - - aom_free(cm->tpl_mvs); - cm->tpl_mvs = NULL; - - av1_free_ref_frame_buffers(cm->buffer_pool); - av1_free_txb_buf(cpi); - av1_free_context_buffers(cm); - - aom_free_frame_buffer(&cpi->last_frame_uf); - av1_free_restoration_buffers(cm); - aom_free_frame_buffer(&cpi->trial_frame_rst); - aom_free_frame_buffer(&cpi->scaled_source); - aom_free_frame_buffer(&cpi->scaled_last_source); - aom_free_frame_buffer(&cpi->alt_ref_buffer); - av1_lookahead_destroy(cpi->lookahead); - - aom_free(cpi->tile_tok[0][0]); - cpi->tile_tok[0][0] = 0; - - aom_free(cpi->tplist[0][0]); - cpi->tplist[0][0] = NULL; - - av1_free_pc_tree(&cpi->td, num_planes); - - aom_free(cpi->td.mb.palette_buffer); - - aom_free(cpi->td.mb.tmp_conv_dst); - for (int j = 0; j < 2; ++j) { - aom_free(cpi->td.mb.tmp_obmc_bufs[j]); - } - -#if CONFIG_DENOISE - if (cpi->denoise_and_model) { - aom_denoise_and_model_free(cpi->denoise_and_model); - cpi->denoise_and_model = NULL; - } -#endif - if (cpi->film_grain_table) { - aom_film_grain_table_free(cpi->film_grain_table); - cpi->film_grain_table = NULL; - } -} - -static void save_coding_context(AV1_COMP *cpi) { - CODING_CONTEXT *const cc = &cpi->coding_context; - AV1_COMMON *cm = &cpi->common; - - // Stores a snapshot of key state variables which can subsequently be - // restored with a call to av1_restore_coding_context. These functions are - // intended for use in a re-code loop in av1_compress_frame where the - // quantizer value is adjusted between loop iterations. - av1_copy(cc->nmv_vec_cost, cpi->td.mb.nmv_vec_cost); - av1_copy(cc->nmv_costs, cpi->nmv_costs); - av1_copy(cc->nmv_costs_hp, cpi->nmv_costs_hp); - - cc->fc = *cm->fc; -} - -static void restore_coding_context(AV1_COMP *cpi) { - CODING_CONTEXT *const cc = &cpi->coding_context; - AV1_COMMON *cm = &cpi->common; - - // Restore key state variables to the snapshot state stored in the - // previous call to av1_save_coding_context. - av1_copy(cpi->td.mb.nmv_vec_cost, cc->nmv_vec_cost); - av1_copy(cpi->nmv_costs, cc->nmv_costs); - av1_copy(cpi->nmv_costs_hp, cc->nmv_costs_hp); - - *cm->fc = cc->fc; -} - -static void configure_static_seg_features(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - const RATE_CONTROL *const rc = &cpi->rc; - struct segmentation *const seg = &cm->seg; - - int high_q = (int)(rc->avg_q > 48.0); - int qi_delta; - - // Disable and clear down for KF - if (cm->frame_type == KEY_FRAME) { - // Clear down the global segmentation map - memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - seg->update_map = 0; - seg->update_data = 0; - cpi->static_mb_pct = 0; - - // Disable segmentation - av1_disable_segmentation(seg); - - // Clear down the segment features. - av1_clearall_segfeatures(seg); - } else if (cpi->refresh_alt_ref_frame) { - // If this is an alt ref frame - // Clear down the global segmentation map - memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - seg->update_map = 0; - seg->update_data = 0; - cpi->static_mb_pct = 0; - - // Disable segmentation and individual segment features by default - av1_disable_segmentation(seg); - av1_clearall_segfeatures(seg); - - // Scan frames from current to arf frame. - // This function re-enables segmentation if appropriate. - av1_update_mbgraph_stats(cpi); - - // If segmentation was enabled set those features needed for the - // arf itself. - if (seg->enabled) { - seg->update_map = 1; - seg->update_data = 1; - - qi_delta = av1_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875, - cm->seq_params.bit_depth); - av1_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2); - av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_H, -2); - av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_V, -2); - av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_U, -2); - av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_V, -2); - - av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_H); - av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_V); - av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_U); - av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_V); - - av1_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); - } - } else if (seg->enabled) { - // All other frames if segmentation has been enabled - - // First normal frame in a valid gf or alt ref group - if (rc->frames_since_golden == 0) { - // Set up segment features for normal frames in an arf group - if (rc->source_alt_ref_active) { - seg->update_map = 0; - seg->update_data = 1; - - qi_delta = av1_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125, - cm->seq_params.bit_depth); - av1_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2); - av1_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); - - av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_H, -2); - av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_V, -2); - av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_U, -2); - av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_V, -2); - - av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_H); - av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_Y_V); - av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_U); - av1_enable_segfeature(seg, 1, SEG_LVL_ALT_LF_V); - - // Segment coding disabled for compred testing - if (high_q || (cpi->static_mb_pct == 100)) { - av1_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); - av1_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME); - av1_enable_segfeature(seg, 1, SEG_LVL_SKIP); - } - } else { - // Disable segmentation and clear down features if alt ref - // is not active for this group - - av1_disable_segmentation(seg); - - memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); - - seg->update_map = 0; - seg->update_data = 0; - - av1_clearall_segfeatures(seg); - } - } else if (rc->is_src_frame_alt_ref) { - // Special case where we are coding over the top of a previous - // alt ref frame. - // Segment coding disabled for compred testing - - // Enable ref frame features for segment 0 as well - av1_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME); - av1_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME); - - // All mbs should use ALTREF_FRAME - av1_clear_segdata(seg, 0, SEG_LVL_REF_FRAME); - av1_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME); - av1_clear_segdata(seg, 1, SEG_LVL_REF_FRAME); - av1_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); - - // Skip all MBs if high Q (0,0 mv and skip coeffs) - if (high_q) { - av1_enable_segfeature(seg, 0, SEG_LVL_SKIP); - av1_enable_segfeature(seg, 1, SEG_LVL_SKIP); - } - // Enable data update - seg->update_data = 1; - } else { - // All other frames. - - // No updates.. leave things as they are. - seg->update_map = 0; - seg->update_data = 0; - } - } -} - -static void update_reference_segmentation_map(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - MB_MODE_INFO **mi_4x4_ptr = cm->mi_grid_visible; - uint8_t *cache_ptr = cm->current_frame_seg_map; - int row, col; - - for (row = 0; row < cm->mi_rows; row++) { - MB_MODE_INFO **mi_4x4 = mi_4x4_ptr; - uint8_t *cache = cache_ptr; - for (col = 0; col < cm->mi_cols; col++, mi_4x4++, cache++) - cache[0] = mi_4x4[0]->segment_id; - mi_4x4_ptr += cm->mi_stride; - cache_ptr += cm->mi_cols; - } -} - -static void alloc_raw_frame_buffers(AV1_COMP *cpi) { - AV1_COMMON *cm = &cpi->common; - const SequenceHeader *const seq_params = &cm->seq_params; - const AV1EncoderConfig *oxcf = &cpi->oxcf; - - if (!cpi->lookahead) - cpi->lookahead = - av1_lookahead_init(oxcf->width, oxcf->height, seq_params->subsampling_x, - seq_params->subsampling_y, - seq_params->use_highbitdepth, oxcf->lag_in_frames); - if (!cpi->lookahead) - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate lag buffers"); - - // TODO(agrange) Check if ARF is enabled and skip allocation if not. - if (aom_realloc_frame_buffer( - &cpi->alt_ref_buffer, oxcf->width, oxcf->height, - seq_params->subsampling_x, seq_params->subsampling_y, - seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS, - cm->byte_alignment, NULL, NULL, NULL)) - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate altref buffer"); -} - -static void alloc_util_frame_buffers(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - const SequenceHeader *const seq_params = &cm->seq_params; - if (aom_realloc_frame_buffer( - &cpi->last_frame_uf, cm->width, cm->height, seq_params->subsampling_x, - seq_params->subsampling_y, seq_params->use_highbitdepth, - AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate last frame buffer"); - - if (aom_realloc_frame_buffer( - &cpi->trial_frame_rst, cm->superres_upscaled_width, - cm->superres_upscaled_height, seq_params->subsampling_x, - seq_params->subsampling_y, seq_params->use_highbitdepth, - AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate trial restored frame buffer"); - - if (aom_realloc_frame_buffer( - &cpi->scaled_source, cm->width, cm->height, seq_params->subsampling_x, - seq_params->subsampling_y, seq_params->use_highbitdepth, - AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate scaled source buffer"); - - if (aom_realloc_frame_buffer( - &cpi->scaled_last_source, cm->width, cm->height, - seq_params->subsampling_x, seq_params->subsampling_y, - seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS, - cm->byte_alignment, NULL, NULL, NULL)) - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate scaled last source buffer"); -} - -static void alloc_compressor_data(AV1_COMP *cpi) { - AV1_COMMON *cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - - av1_alloc_context_buffers(cm, cm->width, cm->height); - - int mi_rows_aligned_to_sb = - ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2); - int sb_rows = mi_rows_aligned_to_sb >> cm->seq_params.mib_size_log2; - - av1_alloc_txb_buf(cpi); - - alloc_context_buffers_ext(cpi); - - aom_free(cpi->tile_tok[0][0]); - - { - unsigned int tokens = - get_token_alloc(cm->mb_rows, cm->mb_cols, MAX_SB_SIZE_LOG2, num_planes); - CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0], - aom_calloc(tokens, sizeof(*cpi->tile_tok[0][0]))); - } - aom_free(cpi->tplist[0][0]); - - CHECK_MEM_ERROR(cm, cpi->tplist[0][0], - aom_calloc(sb_rows * MAX_TILE_ROWS * MAX_TILE_COLS, - sizeof(*cpi->tplist[0][0]))); - - av1_setup_pc_tree(&cpi->common, &cpi->td); -} - -void av1_new_framerate(AV1_COMP *cpi, double framerate) { - cpi->framerate = framerate < 0.1 ? 30 : framerate; - av1_rc_update_framerate(cpi, cpi->common.width, cpi->common.height); -} - -static void set_tile_info(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - int i, start_sb; - - av1_get_tile_limits(cm); - - // configure tile columns - if (cpi->oxcf.tile_width_count == 0 || cpi->oxcf.tile_height_count == 0) { - cm->uniform_tile_spacing_flag = 1; - cm->log2_tile_cols = AOMMAX(cpi->oxcf.tile_columns, cm->min_log2_tile_cols); - cm->log2_tile_cols = AOMMIN(cm->log2_tile_cols, cm->max_log2_tile_cols); - } else { - int mi_cols = ALIGN_POWER_OF_TWO(cm->mi_cols, cm->seq_params.mib_size_log2); - int sb_cols = mi_cols >> cm->seq_params.mib_size_log2; - int size_sb, j = 0; - cm->uniform_tile_spacing_flag = 0; - for (i = 0, start_sb = 0; start_sb < sb_cols && i < MAX_TILE_COLS; i++) { - cm->tile_col_start_sb[i] = start_sb; - size_sb = cpi->oxcf.tile_widths[j++]; - if (j >= cpi->oxcf.tile_width_count) j = 0; - start_sb += AOMMIN(size_sb, cm->max_tile_width_sb); - } - cm->tile_cols = i; - cm->tile_col_start_sb[i] = sb_cols; - } - av1_calculate_tile_cols(cm); - - // configure tile rows - if (cm->uniform_tile_spacing_flag) { - cm->log2_tile_rows = AOMMAX(cpi->oxcf.tile_rows, cm->min_log2_tile_rows); - cm->log2_tile_rows = AOMMIN(cm->log2_tile_rows, cm->max_log2_tile_rows); - } else { - int mi_rows = ALIGN_POWER_OF_TWO(cm->mi_rows, cm->seq_params.mib_size_log2); - int sb_rows = mi_rows >> cm->seq_params.mib_size_log2; - int size_sb, j = 0; - for (i = 0, start_sb = 0; start_sb < sb_rows && i < MAX_TILE_ROWS; i++) { - cm->tile_row_start_sb[i] = start_sb; - size_sb = cpi->oxcf.tile_heights[j++]; - if (j >= cpi->oxcf.tile_height_count) j = 0; - start_sb += AOMMIN(size_sb, cm->max_tile_height_sb); - } - cm->tile_rows = i; - cm->tile_row_start_sb[i] = sb_rows; - } - av1_calculate_tile_rows(cm); -} - -static void update_frame_size(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; - - av1_set_mb_mi(cm, cm->width, cm->height); - av1_init_context_buffers(cm); - av1_init_macroblockd(cm, xd, NULL); - memset(cpi->mbmi_ext_base, 0, - cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base)); - set_tile_info(cpi); -} - -static void init_buffer_indices(AV1_COMP *cpi) { - int fb_idx; - for (fb_idx = 0; fb_idx < REF_FRAMES; ++fb_idx) - cpi->ref_fb_idx[fb_idx] = fb_idx; - cpi->rate_index = 0; - cpi->rate_size = 0; - cpi->cur_poc = -1; -} - -static INLINE int does_level_match(int width, int height, double fps, - int lvl_width, int lvl_height, - double lvl_fps, int lvl_dim_mult) { - const int64_t lvl_luma_pels = lvl_width * lvl_height; - const double lvl_display_sample_rate = lvl_luma_pels * lvl_fps; - const int64_t luma_pels = width * height; - const double display_sample_rate = luma_pels * fps; - return luma_pels <= lvl_luma_pels && - display_sample_rate <= lvl_display_sample_rate && - width <= lvl_width * lvl_dim_mult && - height <= lvl_height * lvl_dim_mult; -} - -static void set_bitstream_level_tier(SequenceHeader *seq, AV1_COMMON *cm, - const AV1EncoderConfig *oxcf) { - // TODO(any): This is a placeholder function that only addresses dimensions - // and max display sample rates. - // Need to add checks for max bit rate, max decoded luma sample rate, header - // rate, etc. that are not covered by this function. - (void)oxcf; - BitstreamLevel bl = { 9, 3 }; - if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, 512, - 288, 30.0, 4)) { - bl.major = 2; - bl.minor = 0; - } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, - 704, 396, 30.0, 4)) { - bl.major = 2; - bl.minor = 1; - } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, - 1088, 612, 30.0, 4)) { - bl.major = 3; - bl.minor = 0; - } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, - 1376, 774, 30.0, 4)) { - bl.major = 3; - bl.minor = 1; - } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, - 2048, 1152, 30.0, 3)) { - bl.major = 4; - bl.minor = 0; - } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, - 2048, 1152, 60.0, 3)) { - bl.major = 4; - bl.minor = 1; - } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, - 4096, 2176, 30.0, 2)) { - bl.major = 5; - bl.minor = 0; - } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, - 4096, 2176, 60.0, 2)) { - bl.major = 5; - bl.minor = 1; - } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, - 4096, 2176, 120.0, 2)) { - bl.major = 5; - bl.minor = 2; - } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, - 8192, 4352, 30.0, 2)) { - bl.major = 6; - bl.minor = 0; - } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, - 8192, 4352, 60.0, 2)) { - bl.major = 6; - bl.minor = 1; - } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, - 8192, 4352, 120.0, 2)) { - bl.major = 6; - bl.minor = 2; - } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, - 16384, 8704, 30.0, 2)) { - bl.major = 7; - bl.minor = 0; - } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, - 16384, 8704, 60.0, 2)) { - bl.major = 7; - bl.minor = 1; - } else if (does_level_match(oxcf->width, oxcf->height, oxcf->init_framerate, - 16384, 8704, 120.0, 2)) { - bl.major = 7; - bl.minor = 2; - } - for (int i = 0; i < MAX_NUM_OPERATING_POINTS; ++i) { - seq->level[i] = bl; - seq->tier[i] = 0; // setting main tier by default - // Set the maximum parameters for bitrate and buffer size for this profile, - // level, and tier - cm->op_params[i].bitrate = max_level_bitrate( - cm->seq_params.profile, major_minor_to_seq_level_idx(seq->level[i]), - seq->tier[i]); - // Level with seq_level_idx = 31 returns a high "dummy" bitrate to pass the - // check - if (cm->op_params[i].bitrate == 0) - aom_internal_error( - &cm->error, AOM_CODEC_UNSUP_BITSTREAM, - "AV1 does not support this combination of profile, level, and tier."); - // Buffer size in bits/s is bitrate in bits/s * 1 s - cm->op_params[i].buffer_size = cm->op_params[i].bitrate; - } -} - -static void init_seq_coding_tools(SequenceHeader *seq, AV1_COMMON *cm, - const AV1EncoderConfig *oxcf) { - seq->still_picture = (oxcf->limit == 1); - seq->reduced_still_picture_hdr = seq->still_picture; - seq->reduced_still_picture_hdr &= !oxcf->full_still_picture_hdr; - seq->force_screen_content_tools = 2; - seq->force_integer_mv = 2; - seq->enable_order_hint = oxcf->enable_order_hint; - seq->frame_id_numbers_present_flag = oxcf->large_scale_tile; - if (seq->still_picture && seq->reduced_still_picture_hdr) { - seq->enable_order_hint = 0; - seq->frame_id_numbers_present_flag = 0; - seq->force_screen_content_tools = 2; - seq->force_integer_mv = 2; - } - seq->order_hint_bits_minus_1 = - seq->enable_order_hint ? DEFAULT_EXPLICIT_ORDER_HINT_BITS - 1 : -1; - - seq->enable_dual_filter = oxcf->enable_dual_filter; - seq->enable_jnt_comp = oxcf->enable_jnt_comp; - seq->enable_jnt_comp &= seq->enable_order_hint; - seq->enable_ref_frame_mvs = oxcf->enable_ref_frame_mvs; - seq->enable_ref_frame_mvs &= seq->enable_order_hint; - seq->enable_superres = oxcf->enable_superres; - seq->enable_cdef = oxcf->enable_cdef; - seq->enable_restoration = oxcf->enable_restoration; - seq->enable_warped_motion = oxcf->enable_warped_motion; - seq->enable_interintra_compound = 1; - seq->enable_masked_compound = 1; - seq->enable_intra_edge_filter = 1; - seq->enable_filter_intra = 1; - - set_bitstream_level_tier(seq, cm, oxcf); - - if (seq->operating_points_cnt_minus_1 == 0) { - seq->operating_point_idc[0] = 0; - } else { - // Set operating_point_idc[] such that for the i-th operating point the - // first (operating_points_cnt-i) spatial layers and the first temporal - // layer are decoded Note that highest quality operating point should come - // first - for (int i = 0; i < seq->operating_points_cnt_minus_1 + 1; i++) - seq->operating_point_idc[i] = - (~(~0u << (seq->operating_points_cnt_minus_1 + 1 - i)) << 8) | 1; - } -} - -static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) { - AV1_COMMON *const cm = &cpi->common; - - cpi->oxcf = *oxcf; - cpi->framerate = oxcf->init_framerate; - - cm->seq_params.profile = oxcf->profile; - cm->seq_params.bit_depth = oxcf->bit_depth; - cm->seq_params.use_highbitdepth = oxcf->use_highbitdepth; - cm->seq_params.color_primaries = oxcf->color_primaries; - cm->seq_params.transfer_characteristics = oxcf->transfer_characteristics; - cm->seq_params.matrix_coefficients = oxcf->matrix_coefficients; - cm->seq_params.monochrome = oxcf->monochrome; - cm->seq_params.chroma_sample_position = oxcf->chroma_sample_position; - cm->seq_params.color_range = oxcf->color_range; - cm->timing_info_present = oxcf->timing_info_present; - cm->timing_info.num_units_in_display_tick = - oxcf->timing_info.num_units_in_display_tick; - cm->timing_info.time_scale = oxcf->timing_info.time_scale; - cm->timing_info.equal_picture_interval = - oxcf->timing_info.equal_picture_interval; - cm->timing_info.num_ticks_per_picture = - oxcf->timing_info.num_ticks_per_picture; - - cm->seq_params.display_model_info_present_flag = - oxcf->display_model_info_present_flag; - cm->seq_params.decoder_model_info_present_flag = - oxcf->decoder_model_info_present_flag; - if (oxcf->decoder_model_info_present_flag) { - // set the decoder model parameters in schedule mode - cm->buffer_model.num_units_in_decoding_tick = - oxcf->buffer_model.num_units_in_decoding_tick; - cm->buffer_removal_time_present = 1; - set_aom_dec_model_info(&cm->buffer_model); - set_dec_model_op_parameters(&cm->op_params[0]); - } else if (cm->timing_info_present && - cm->timing_info.equal_picture_interval && - !cm->seq_params.decoder_model_info_present_flag) { - // set the decoder model parameters in resource availability mode - set_resource_availability_parameters(&cm->op_params[0]); - } else { - cm->op_params[0].initial_display_delay = - 10; // Default value (not signaled) - } - - if (cm->seq_params.monochrome) { - cm->seq_params.subsampling_x = 1; - cm->seq_params.subsampling_y = 1; - } else if (cm->seq_params.color_primaries == AOM_CICP_CP_BT_709 && - cm->seq_params.transfer_characteristics == AOM_CICP_TC_SRGB && - cm->seq_params.matrix_coefficients == AOM_CICP_MC_IDENTITY) { - cm->seq_params.subsampling_x = 0; - cm->seq_params.subsampling_y = 0; - } else { - if (cm->seq_params.profile == 0) { - cm->seq_params.subsampling_x = 1; - cm->seq_params.subsampling_y = 1; - } else if (cm->seq_params.profile == 1) { - cm->seq_params.subsampling_x = 0; - cm->seq_params.subsampling_y = 0; - } else { - if (cm->seq_params.bit_depth == AOM_BITS_12) { - cm->seq_params.subsampling_x = oxcf->chroma_subsampling_x; - cm->seq_params.subsampling_y = oxcf->chroma_subsampling_y; - } else { - cm->seq_params.subsampling_x = 1; - cm->seq_params.subsampling_y = 0; - } - } - } - - cm->width = oxcf->width; - cm->height = oxcf->height; - set_sb_size(&cm->seq_params, - select_sb_size(cpi)); // set sb size before allocations - alloc_compressor_data(cpi); - - update_film_grain_parameters(cpi, oxcf); - - // Single thread case: use counts in common. - cpi->td.counts = &cpi->counts; - - // change includes all joint functionality - av1_change_config(cpi, oxcf); - - cpi->static_mb_pct = 0; - cpi->ref_frame_flags = 0; - - // Reset resize pending flags - cpi->resize_pending_width = 0; - cpi->resize_pending_height = 0; - - init_buffer_indices(cpi); -} - -static void set_rc_buffer_sizes(RATE_CONTROL *rc, - const AV1EncoderConfig *oxcf) { - const int64_t bandwidth = oxcf->target_bandwidth; - const int64_t starting = oxcf->starting_buffer_level_ms; - const int64_t optimal = oxcf->optimal_buffer_level_ms; - const int64_t maximum = oxcf->maximum_buffer_size_ms; - - rc->starting_buffer_level = starting * bandwidth / 1000; - rc->optimal_buffer_level = - (optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000; - rc->maximum_buffer_size = - (maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000; -} - -#define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, JSDAF, JSVAF) \ - cpi->fn_ptr[BT].sdf = SDF; \ - cpi->fn_ptr[BT].sdaf = SDAF; \ - cpi->fn_ptr[BT].vf = VF; \ - cpi->fn_ptr[BT].svf = SVF; \ - cpi->fn_ptr[BT].svaf = SVAF; \ - cpi->fn_ptr[BT].sdx4df = SDX4DF; \ - cpi->fn_ptr[BT].jsdaf = JSDAF; \ - cpi->fn_ptr[BT].jsvaf = JSVAF; - -#define MAKE_BFP_SAD_WRAPPER(fnname) \ - static unsigned int fnname##_bits8(const uint8_t *src_ptr, \ - int source_stride, \ - const uint8_t *ref_ptr, int ref_stride) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride); \ - } \ - static unsigned int fnname##_bits10( \ - const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ - int ref_stride) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2; \ - } \ - static unsigned int fnname##_bits12( \ - const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ - int ref_stride) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4; \ - } - -#define MAKE_BFP_SADAVG_WRAPPER(fnname) \ - static unsigned int fnname##_bits8( \ - const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ - int ref_stride, const uint8_t *second_pred) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred); \ - } \ - static unsigned int fnname##_bits10( \ - const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ - int ref_stride, const uint8_t *second_pred) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \ - 2; \ - } \ - static unsigned int fnname##_bits12( \ - const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ - int ref_stride, const uint8_t *second_pred) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \ - 4; \ - } - -#define MAKE_BFP_SAD4D_WRAPPER(fnname) \ - static void fnname##_bits8(const uint8_t *src_ptr, int source_stride, \ - const uint8_t *const ref_ptr[], int ref_stride, \ - unsigned int *sad_array) { \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - } \ - static void fnname##_bits10(const uint8_t *src_ptr, int source_stride, \ - const uint8_t *const ref_ptr[], int ref_stride, \ - unsigned int *sad_array) { \ - int i; \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - for (i = 0; i < 4; i++) sad_array[i] >>= 2; \ - } \ - static void fnname##_bits12(const uint8_t *src_ptr, int source_stride, \ - const uint8_t *const ref_ptr[], int ref_stride, \ - unsigned int *sad_array) { \ - int i; \ - fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ - for (i = 0; i < 4; i++) sad_array[i] >>= 4; \ - } - -#define MAKE_BFP_JSADAVG_WRAPPER(fnname) \ - static unsigned int fnname##_bits8( \ - const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ - int ref_stride, const uint8_t *second_pred, \ - const JNT_COMP_PARAMS *jcp_param) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred, \ - jcp_param); \ - } \ - static unsigned int fnname##_bits10( \ - const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ - int ref_stride, const uint8_t *second_pred, \ - const JNT_COMP_PARAMS *jcp_param) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred, \ - jcp_param) >> \ - 2; \ - } \ - static unsigned int fnname##_bits12( \ - const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ - int ref_stride, const uint8_t *second_pred, \ - const JNT_COMP_PARAMS *jcp_param) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred, \ - jcp_param) >> \ - 4; \ - } - -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad128x128) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad128x128_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad128x128x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad128x64) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad128x64_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad128x64x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x128) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x128_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x128x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x16) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x16_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x16x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x32) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x32_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x32x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x32) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x32_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x32x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x64) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x64_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x64x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x32) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x32_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x32x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x64) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x64_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x64x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x16) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x16_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x16x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x8) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x8_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x8x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x16) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x16_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x16x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x8) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x8_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x8x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x4) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x4_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x4x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x8) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad4x8_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x8x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x4) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad4x4_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x4x4d) - -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad4x16) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad4x16_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad4x16x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x4) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x4_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x4x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad8x32) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad8x32_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad8x32x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad32x8) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad32x8_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad32x8x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad16x64) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad16x64_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad16x64x4d) -MAKE_BFP_SAD_WRAPPER(aom_highbd_sad64x16) -MAKE_BFP_SADAVG_WRAPPER(aom_highbd_sad64x16_avg) -MAKE_BFP_SAD4D_WRAPPER(aom_highbd_sad64x16x4d) - -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad128x128_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad128x64_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad64x128_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad32x16_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x32_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad64x32_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad32x64_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad32x32_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad64x64_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x16_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x8_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad8x16_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad8x8_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad8x4_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad4x8_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad4x4_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad4x16_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x4_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad8x32_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad32x8_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad16x64_avg) -MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_jnt_sad64x16_avg) - -#define HIGHBD_MBFP(BT, MCSDF, MCSVF) \ - cpi->fn_ptr[BT].msdf = MCSDF; \ - cpi->fn_ptr[BT].msvf = MCSVF; - -#define MAKE_MBFP_COMPOUND_SAD_WRAPPER(fnname) \ - static unsigned int fnname##_bits8( \ - const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ - int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m, \ - int m_stride, int invert_mask) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \ - second_pred_ptr, m, m_stride, invert_mask); \ - } \ - static unsigned int fnname##_bits10( \ - const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ - int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m, \ - int m_stride, int invert_mask) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \ - second_pred_ptr, m, m_stride, invert_mask) >> \ - 2; \ - } \ - static unsigned int fnname##_bits12( \ - const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ - int ref_stride, const uint8_t *second_pred_ptr, const uint8_t *m, \ - int m_stride, int invert_mask) { \ - return fnname(src_ptr, source_stride, ref_ptr, ref_stride, \ - second_pred_ptr, m, m_stride, invert_mask) >> \ - 4; \ - } - -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad128x128) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad128x64) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x128) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x64) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x32) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x64) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x32) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x16) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x32) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x16) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x8) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x16) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x8) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x4) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x8) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x4) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad4x16) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x4) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad8x32) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad32x8) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad16x64) -MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x16) - -#define HIGHBD_OBFP(BT, OSDF, OVF, OSVF) \ - cpi->fn_ptr[BT].osdf = OSDF; \ - cpi->fn_ptr[BT].ovf = OVF; \ - cpi->fn_ptr[BT].osvf = OSVF; - -#define MAKE_OBFP_SAD_WRAPPER(fnname) \ - static unsigned int fnname##_bits8(const uint8_t *ref, int ref_stride, \ - const int32_t *wsrc, \ - const int32_t *msk) { \ - return fnname(ref, ref_stride, wsrc, msk); \ - } \ - static unsigned int fnname##_bits10(const uint8_t *ref, int ref_stride, \ - const int32_t *wsrc, \ - const int32_t *msk) { \ - return fnname(ref, ref_stride, wsrc, msk) >> 2; \ - } \ - static unsigned int fnname##_bits12(const uint8_t *ref, int ref_stride, \ - const int32_t *wsrc, \ - const int32_t *msk) { \ - return fnname(ref, ref_stride, wsrc, msk) >> 4; \ - } - -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad128x128) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad128x64) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x128) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x64) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x32) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x64) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x32) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x16) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x32) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x16) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x8) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x16) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x8) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x4) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x8) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x4) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad4x16) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x4) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad8x32) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad32x8) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x64) -MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x16) - -static void highbd_set_var_fns(AV1_COMP *const cpi) { - AV1_COMMON *const cm = &cpi->common; - if (cm->seq_params.use_highbitdepth) { - switch (cm->seq_params.bit_depth) { - case AOM_BITS_8: - HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits8, - aom_highbd_sad64x16_avg_bits8, aom_highbd_8_variance64x16, - aom_highbd_8_sub_pixel_variance64x16, - aom_highbd_8_sub_pixel_avg_variance64x16, - aom_highbd_sad64x16x4d_bits8, - aom_highbd_jnt_sad64x16_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance64x16) - - HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits8, - aom_highbd_sad16x64_avg_bits8, aom_highbd_8_variance16x64, - aom_highbd_8_sub_pixel_variance16x64, - aom_highbd_8_sub_pixel_avg_variance16x64, - aom_highbd_sad16x64x4d_bits8, - aom_highbd_jnt_sad16x64_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance16x64) - - HIGHBD_BFP( - BLOCK_32X8, aom_highbd_sad32x8_bits8, aom_highbd_sad32x8_avg_bits8, - aom_highbd_8_variance32x8, aom_highbd_8_sub_pixel_variance32x8, - aom_highbd_8_sub_pixel_avg_variance32x8, - aom_highbd_sad32x8x4d_bits8, aom_highbd_jnt_sad32x8_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance32x8) - - HIGHBD_BFP( - BLOCK_8X32, aom_highbd_sad8x32_bits8, aom_highbd_sad8x32_avg_bits8, - aom_highbd_8_variance8x32, aom_highbd_8_sub_pixel_variance8x32, - aom_highbd_8_sub_pixel_avg_variance8x32, - aom_highbd_sad8x32x4d_bits8, aom_highbd_jnt_sad8x32_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance8x32) - - HIGHBD_BFP( - BLOCK_16X4, aom_highbd_sad16x4_bits8, aom_highbd_sad16x4_avg_bits8, - aom_highbd_8_variance16x4, aom_highbd_8_sub_pixel_variance16x4, - aom_highbd_8_sub_pixel_avg_variance16x4, - aom_highbd_sad16x4x4d_bits8, aom_highbd_jnt_sad16x4_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance16x4) - - HIGHBD_BFP( - BLOCK_4X16, aom_highbd_sad4x16_bits8, aom_highbd_sad4x16_avg_bits8, - aom_highbd_8_variance4x16, aom_highbd_8_sub_pixel_variance4x16, - aom_highbd_8_sub_pixel_avg_variance4x16, - aom_highbd_sad4x16x4d_bits8, aom_highbd_jnt_sad4x16_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance4x16) - - HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits8, - aom_highbd_sad32x16_avg_bits8, aom_highbd_8_variance32x16, - aom_highbd_8_sub_pixel_variance32x16, - aom_highbd_8_sub_pixel_avg_variance32x16, - aom_highbd_sad32x16x4d_bits8, - aom_highbd_jnt_sad32x16_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance32x16) - - HIGHBD_BFP(BLOCK_16X32, aom_highbd_sad16x32_bits8, - aom_highbd_sad16x32_avg_bits8, aom_highbd_8_variance16x32, - aom_highbd_8_sub_pixel_variance16x32, - aom_highbd_8_sub_pixel_avg_variance16x32, - aom_highbd_sad16x32x4d_bits8, - aom_highbd_jnt_sad16x32_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance16x32) - - HIGHBD_BFP(BLOCK_64X32, aom_highbd_sad64x32_bits8, - aom_highbd_sad64x32_avg_bits8, aom_highbd_8_variance64x32, - aom_highbd_8_sub_pixel_variance64x32, - aom_highbd_8_sub_pixel_avg_variance64x32, - aom_highbd_sad64x32x4d_bits8, - aom_highbd_jnt_sad64x32_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance64x32) - - HIGHBD_BFP(BLOCK_32X64, aom_highbd_sad32x64_bits8, - aom_highbd_sad32x64_avg_bits8, aom_highbd_8_variance32x64, - aom_highbd_8_sub_pixel_variance32x64, - aom_highbd_8_sub_pixel_avg_variance32x64, - aom_highbd_sad32x64x4d_bits8, - aom_highbd_jnt_sad32x64_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance32x64) - - HIGHBD_BFP(BLOCK_32X32, aom_highbd_sad32x32_bits8, - aom_highbd_sad32x32_avg_bits8, aom_highbd_8_variance32x32, - aom_highbd_8_sub_pixel_variance32x32, - aom_highbd_8_sub_pixel_avg_variance32x32, - aom_highbd_sad32x32x4d_bits8, - aom_highbd_jnt_sad32x32_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance32x32) - - HIGHBD_BFP(BLOCK_64X64, aom_highbd_sad64x64_bits8, - aom_highbd_sad64x64_avg_bits8, aom_highbd_8_variance64x64, - aom_highbd_8_sub_pixel_variance64x64, - aom_highbd_8_sub_pixel_avg_variance64x64, - aom_highbd_sad64x64x4d_bits8, - aom_highbd_jnt_sad64x64_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance64x64) - - HIGHBD_BFP(BLOCK_16X16, aom_highbd_sad16x16_bits8, - aom_highbd_sad16x16_avg_bits8, aom_highbd_8_variance16x16, - aom_highbd_8_sub_pixel_variance16x16, - aom_highbd_8_sub_pixel_avg_variance16x16, - aom_highbd_sad16x16x4d_bits8, - aom_highbd_jnt_sad16x16_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance16x16) - - HIGHBD_BFP( - BLOCK_16X8, aom_highbd_sad16x8_bits8, aom_highbd_sad16x8_avg_bits8, - aom_highbd_8_variance16x8, aom_highbd_8_sub_pixel_variance16x8, - aom_highbd_8_sub_pixel_avg_variance16x8, - aom_highbd_sad16x8x4d_bits8, aom_highbd_jnt_sad16x8_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance16x8) - - HIGHBD_BFP( - BLOCK_8X16, aom_highbd_sad8x16_bits8, aom_highbd_sad8x16_avg_bits8, - aom_highbd_8_variance8x16, aom_highbd_8_sub_pixel_variance8x16, - aom_highbd_8_sub_pixel_avg_variance8x16, - aom_highbd_sad8x16x4d_bits8, aom_highbd_jnt_sad8x16_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance8x16) - - HIGHBD_BFP(BLOCK_8X8, aom_highbd_sad8x8_bits8, - aom_highbd_sad8x8_avg_bits8, aom_highbd_8_variance8x8, - aom_highbd_8_sub_pixel_variance8x8, - aom_highbd_8_sub_pixel_avg_variance8x8, - aom_highbd_sad8x8x4d_bits8, aom_highbd_jnt_sad8x8_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance8x8) - - HIGHBD_BFP(BLOCK_8X4, aom_highbd_sad8x4_bits8, - aom_highbd_sad8x4_avg_bits8, aom_highbd_8_variance8x4, - aom_highbd_8_sub_pixel_variance8x4, - aom_highbd_8_sub_pixel_avg_variance8x4, - aom_highbd_sad8x4x4d_bits8, aom_highbd_jnt_sad8x4_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance8x4) - - HIGHBD_BFP(BLOCK_4X8, aom_highbd_sad4x8_bits8, - aom_highbd_sad4x8_avg_bits8, aom_highbd_8_variance4x8, - aom_highbd_8_sub_pixel_variance4x8, - aom_highbd_8_sub_pixel_avg_variance4x8, - aom_highbd_sad4x8x4d_bits8, aom_highbd_jnt_sad4x8_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance4x8) - - HIGHBD_BFP(BLOCK_4X4, aom_highbd_sad4x4_bits8, - aom_highbd_sad4x4_avg_bits8, aom_highbd_8_variance4x4, - aom_highbd_8_sub_pixel_variance4x4, - aom_highbd_8_sub_pixel_avg_variance4x4, - aom_highbd_sad4x4x4d_bits8, aom_highbd_jnt_sad4x4_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance4x4) - - HIGHBD_BFP( - BLOCK_128X128, aom_highbd_sad128x128_bits8, - aom_highbd_sad128x128_avg_bits8, aom_highbd_8_variance128x128, - aom_highbd_8_sub_pixel_variance128x128, - aom_highbd_8_sub_pixel_avg_variance128x128, - aom_highbd_sad128x128x4d_bits8, aom_highbd_jnt_sad128x128_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance128x128) - - HIGHBD_BFP(BLOCK_128X64, aom_highbd_sad128x64_bits8, - aom_highbd_sad128x64_avg_bits8, aom_highbd_8_variance128x64, - aom_highbd_8_sub_pixel_variance128x64, - aom_highbd_8_sub_pixel_avg_variance128x64, - aom_highbd_sad128x64x4d_bits8, - aom_highbd_jnt_sad128x64_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance128x64) - - HIGHBD_BFP(BLOCK_64X128, aom_highbd_sad64x128_bits8, - aom_highbd_sad64x128_avg_bits8, aom_highbd_8_variance64x128, - aom_highbd_8_sub_pixel_variance64x128, - aom_highbd_8_sub_pixel_avg_variance64x128, - aom_highbd_sad64x128x4d_bits8, - aom_highbd_jnt_sad64x128_avg_bits8, - aom_highbd_8_jnt_sub_pixel_avg_variance64x128) - - HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits8, - aom_highbd_8_masked_sub_pixel_variance128x128) - HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits8, - aom_highbd_8_masked_sub_pixel_variance128x64) - HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits8, - aom_highbd_8_masked_sub_pixel_variance64x128) - HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits8, - aom_highbd_8_masked_sub_pixel_variance64x64) - HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits8, - aom_highbd_8_masked_sub_pixel_variance64x32) - HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits8, - aom_highbd_8_masked_sub_pixel_variance32x64) - HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits8, - aom_highbd_8_masked_sub_pixel_variance32x32) - HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits8, - aom_highbd_8_masked_sub_pixel_variance32x16) - HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits8, - aom_highbd_8_masked_sub_pixel_variance16x32) - HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits8, - aom_highbd_8_masked_sub_pixel_variance16x16) - HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits8, - aom_highbd_8_masked_sub_pixel_variance8x16) - HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits8, - aom_highbd_8_masked_sub_pixel_variance16x8) - HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits8, - aom_highbd_8_masked_sub_pixel_variance8x8) - HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits8, - aom_highbd_8_masked_sub_pixel_variance4x8) - HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits8, - aom_highbd_8_masked_sub_pixel_variance8x4) - HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits8, - aom_highbd_8_masked_sub_pixel_variance4x4) - HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits8, - aom_highbd_8_masked_sub_pixel_variance64x16) - HIGHBD_MBFP(BLOCK_16X64, aom_highbd_masked_sad16x64_bits8, - aom_highbd_8_masked_sub_pixel_variance16x64) - HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits8, - aom_highbd_8_masked_sub_pixel_variance32x8) - HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits8, - aom_highbd_8_masked_sub_pixel_variance8x32) - HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits8, - aom_highbd_8_masked_sub_pixel_variance16x4) - HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits8, - aom_highbd_8_masked_sub_pixel_variance4x16) - HIGHBD_OBFP(BLOCK_128X128, aom_highbd_obmc_sad128x128_bits8, - aom_highbd_obmc_variance128x128, - aom_highbd_obmc_sub_pixel_variance128x128) - HIGHBD_OBFP(BLOCK_128X64, aom_highbd_obmc_sad128x64_bits8, - aom_highbd_obmc_variance128x64, - aom_highbd_obmc_sub_pixel_variance128x64) - HIGHBD_OBFP(BLOCK_64X128, aom_highbd_obmc_sad64x128_bits8, - aom_highbd_obmc_variance64x128, - aom_highbd_obmc_sub_pixel_variance64x128) - HIGHBD_OBFP(BLOCK_64X64, aom_highbd_obmc_sad64x64_bits8, - aom_highbd_obmc_variance64x64, - aom_highbd_obmc_sub_pixel_variance64x64) - HIGHBD_OBFP(BLOCK_64X32, aom_highbd_obmc_sad64x32_bits8, - aom_highbd_obmc_variance64x32, - aom_highbd_obmc_sub_pixel_variance64x32) - HIGHBD_OBFP(BLOCK_32X64, aom_highbd_obmc_sad32x64_bits8, - aom_highbd_obmc_variance32x64, - aom_highbd_obmc_sub_pixel_variance32x64) - HIGHBD_OBFP(BLOCK_32X32, aom_highbd_obmc_sad32x32_bits8, - aom_highbd_obmc_variance32x32, - aom_highbd_obmc_sub_pixel_variance32x32) - HIGHBD_OBFP(BLOCK_32X16, aom_highbd_obmc_sad32x16_bits8, - aom_highbd_obmc_variance32x16, - aom_highbd_obmc_sub_pixel_variance32x16) - HIGHBD_OBFP(BLOCK_16X32, aom_highbd_obmc_sad16x32_bits8, - aom_highbd_obmc_variance16x32, - aom_highbd_obmc_sub_pixel_variance16x32) - HIGHBD_OBFP(BLOCK_16X16, aom_highbd_obmc_sad16x16_bits8, - aom_highbd_obmc_variance16x16, - aom_highbd_obmc_sub_pixel_variance16x16) - HIGHBD_OBFP(BLOCK_8X16, aom_highbd_obmc_sad8x16_bits8, - aom_highbd_obmc_variance8x16, - aom_highbd_obmc_sub_pixel_variance8x16) - HIGHBD_OBFP(BLOCK_16X8, aom_highbd_obmc_sad16x8_bits8, - aom_highbd_obmc_variance16x8, - aom_highbd_obmc_sub_pixel_variance16x8) - HIGHBD_OBFP(BLOCK_8X8, aom_highbd_obmc_sad8x8_bits8, - aom_highbd_obmc_variance8x8, - aom_highbd_obmc_sub_pixel_variance8x8) - HIGHBD_OBFP(BLOCK_4X8, aom_highbd_obmc_sad4x8_bits8, - aom_highbd_obmc_variance4x8, - aom_highbd_obmc_sub_pixel_variance4x8) - HIGHBD_OBFP(BLOCK_8X4, aom_highbd_obmc_sad8x4_bits8, - aom_highbd_obmc_variance8x4, - aom_highbd_obmc_sub_pixel_variance8x4) - HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits8, - aom_highbd_obmc_variance4x4, - aom_highbd_obmc_sub_pixel_variance4x4) - HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits8, - aom_highbd_obmc_variance64x16, - aom_highbd_obmc_sub_pixel_variance64x16) - HIGHBD_OBFP(BLOCK_16X64, aom_highbd_obmc_sad16x64_bits8, - aom_highbd_obmc_variance16x64, - aom_highbd_obmc_sub_pixel_variance16x64) - HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits8, - aom_highbd_obmc_variance32x8, - aom_highbd_obmc_sub_pixel_variance32x8) - HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits8, - aom_highbd_obmc_variance8x32, - aom_highbd_obmc_sub_pixel_variance8x32) - HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits8, - aom_highbd_obmc_variance16x4, - aom_highbd_obmc_sub_pixel_variance16x4) - HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits8, - aom_highbd_obmc_variance4x16, - aom_highbd_obmc_sub_pixel_variance4x16) - break; - - case AOM_BITS_10: - HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits10, - aom_highbd_sad64x16_avg_bits10, aom_highbd_10_variance64x16, - aom_highbd_10_sub_pixel_variance64x16, - aom_highbd_10_sub_pixel_avg_variance64x16, - aom_highbd_sad64x16x4d_bits10, - aom_highbd_jnt_sad64x16_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance64x16); - - HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits10, - aom_highbd_sad16x64_avg_bits10, aom_highbd_10_variance16x64, - aom_highbd_10_sub_pixel_variance16x64, - aom_highbd_10_sub_pixel_avg_variance16x64, - aom_highbd_sad16x64x4d_bits10, - aom_highbd_jnt_sad16x64_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance16x64); - - HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits10, - aom_highbd_sad32x8_avg_bits10, aom_highbd_10_variance32x8, - aom_highbd_10_sub_pixel_variance32x8, - aom_highbd_10_sub_pixel_avg_variance32x8, - aom_highbd_sad32x8x4d_bits10, - aom_highbd_jnt_sad32x8_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance32x8); - - HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits10, - aom_highbd_sad8x32_avg_bits10, aom_highbd_10_variance8x32, - aom_highbd_10_sub_pixel_variance8x32, - aom_highbd_10_sub_pixel_avg_variance8x32, - aom_highbd_sad8x32x4d_bits10, - aom_highbd_jnt_sad8x32_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance8x32); - - HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits10, - aom_highbd_sad16x4_avg_bits10, aom_highbd_10_variance16x4, - aom_highbd_10_sub_pixel_variance16x4, - aom_highbd_10_sub_pixel_avg_variance16x4, - aom_highbd_sad16x4x4d_bits10, - aom_highbd_jnt_sad16x4_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance16x4); - - HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits10, - aom_highbd_sad4x16_avg_bits10, aom_highbd_10_variance4x16, - aom_highbd_10_sub_pixel_variance4x16, - aom_highbd_10_sub_pixel_avg_variance4x16, - aom_highbd_sad4x16x4d_bits10, - aom_highbd_jnt_sad4x16_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance4x16); - - HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits10, - aom_highbd_sad32x16_avg_bits10, aom_highbd_10_variance32x16, - aom_highbd_10_sub_pixel_variance32x16, - aom_highbd_10_sub_pixel_avg_variance32x16, - aom_highbd_sad32x16x4d_bits10, - aom_highbd_jnt_sad32x16_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance32x16); - - HIGHBD_BFP(BLOCK_16X32, aom_highbd_sad16x32_bits10, - aom_highbd_sad16x32_avg_bits10, aom_highbd_10_variance16x32, - aom_highbd_10_sub_pixel_variance16x32, - aom_highbd_10_sub_pixel_avg_variance16x32, - aom_highbd_sad16x32x4d_bits10, - aom_highbd_jnt_sad16x32_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance16x32); - - HIGHBD_BFP(BLOCK_64X32, aom_highbd_sad64x32_bits10, - aom_highbd_sad64x32_avg_bits10, aom_highbd_10_variance64x32, - aom_highbd_10_sub_pixel_variance64x32, - aom_highbd_10_sub_pixel_avg_variance64x32, - aom_highbd_sad64x32x4d_bits10, - aom_highbd_jnt_sad64x32_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance64x32); - - HIGHBD_BFP(BLOCK_32X64, aom_highbd_sad32x64_bits10, - aom_highbd_sad32x64_avg_bits10, aom_highbd_10_variance32x64, - aom_highbd_10_sub_pixel_variance32x64, - aom_highbd_10_sub_pixel_avg_variance32x64, - aom_highbd_sad32x64x4d_bits10, - aom_highbd_jnt_sad32x64_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance32x64); - - HIGHBD_BFP(BLOCK_32X32, aom_highbd_sad32x32_bits10, - aom_highbd_sad32x32_avg_bits10, aom_highbd_10_variance32x32, - aom_highbd_10_sub_pixel_variance32x32, - aom_highbd_10_sub_pixel_avg_variance32x32, - aom_highbd_sad32x32x4d_bits10, - aom_highbd_jnt_sad32x32_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance32x32); - - HIGHBD_BFP(BLOCK_64X64, aom_highbd_sad64x64_bits10, - aom_highbd_sad64x64_avg_bits10, aom_highbd_10_variance64x64, - aom_highbd_10_sub_pixel_variance64x64, - aom_highbd_10_sub_pixel_avg_variance64x64, - aom_highbd_sad64x64x4d_bits10, - aom_highbd_jnt_sad64x64_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance64x64); - - HIGHBD_BFP(BLOCK_16X16, aom_highbd_sad16x16_bits10, - aom_highbd_sad16x16_avg_bits10, aom_highbd_10_variance16x16, - aom_highbd_10_sub_pixel_variance16x16, - aom_highbd_10_sub_pixel_avg_variance16x16, - aom_highbd_sad16x16x4d_bits10, - aom_highbd_jnt_sad16x16_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance16x16); - - HIGHBD_BFP(BLOCK_16X8, aom_highbd_sad16x8_bits10, - aom_highbd_sad16x8_avg_bits10, aom_highbd_10_variance16x8, - aom_highbd_10_sub_pixel_variance16x8, - aom_highbd_10_sub_pixel_avg_variance16x8, - aom_highbd_sad16x8x4d_bits10, - aom_highbd_jnt_sad16x8_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance16x8); - - HIGHBD_BFP(BLOCK_8X16, aom_highbd_sad8x16_bits10, - aom_highbd_sad8x16_avg_bits10, aom_highbd_10_variance8x16, - aom_highbd_10_sub_pixel_variance8x16, - aom_highbd_10_sub_pixel_avg_variance8x16, - aom_highbd_sad8x16x4d_bits10, - aom_highbd_jnt_sad8x16_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance8x16); - - HIGHBD_BFP( - BLOCK_8X8, aom_highbd_sad8x8_bits10, aom_highbd_sad8x8_avg_bits10, - aom_highbd_10_variance8x8, aom_highbd_10_sub_pixel_variance8x8, - aom_highbd_10_sub_pixel_avg_variance8x8, - aom_highbd_sad8x8x4d_bits10, aom_highbd_jnt_sad8x8_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance8x8); - - HIGHBD_BFP( - BLOCK_8X4, aom_highbd_sad8x4_bits10, aom_highbd_sad8x4_avg_bits10, - aom_highbd_10_variance8x4, aom_highbd_10_sub_pixel_variance8x4, - aom_highbd_10_sub_pixel_avg_variance8x4, - aom_highbd_sad8x4x4d_bits10, aom_highbd_jnt_sad8x4_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance8x4); - - HIGHBD_BFP( - BLOCK_4X8, aom_highbd_sad4x8_bits10, aom_highbd_sad4x8_avg_bits10, - aom_highbd_10_variance4x8, aom_highbd_10_sub_pixel_variance4x8, - aom_highbd_10_sub_pixel_avg_variance4x8, - aom_highbd_sad4x8x4d_bits10, aom_highbd_jnt_sad4x8_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance4x8); - - HIGHBD_BFP( - BLOCK_4X4, aom_highbd_sad4x4_bits10, aom_highbd_sad4x4_avg_bits10, - aom_highbd_10_variance4x4, aom_highbd_10_sub_pixel_variance4x4, - aom_highbd_10_sub_pixel_avg_variance4x4, - aom_highbd_sad4x4x4d_bits10, aom_highbd_jnt_sad4x4_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance4x4); - - HIGHBD_BFP(BLOCK_128X128, aom_highbd_sad128x128_bits10, - aom_highbd_sad128x128_avg_bits10, - aom_highbd_10_variance128x128, - aom_highbd_10_sub_pixel_variance128x128, - aom_highbd_10_sub_pixel_avg_variance128x128, - aom_highbd_sad128x128x4d_bits10, - aom_highbd_jnt_sad128x128_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance128x128); - - HIGHBD_BFP( - BLOCK_128X64, aom_highbd_sad128x64_bits10, - aom_highbd_sad128x64_avg_bits10, aom_highbd_10_variance128x64, - aom_highbd_10_sub_pixel_variance128x64, - aom_highbd_10_sub_pixel_avg_variance128x64, - aom_highbd_sad128x64x4d_bits10, aom_highbd_jnt_sad128x64_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance128x64); - - HIGHBD_BFP( - BLOCK_64X128, aom_highbd_sad64x128_bits10, - aom_highbd_sad64x128_avg_bits10, aom_highbd_10_variance64x128, - aom_highbd_10_sub_pixel_variance64x128, - aom_highbd_10_sub_pixel_avg_variance64x128, - aom_highbd_sad64x128x4d_bits10, aom_highbd_jnt_sad64x128_avg_bits10, - aom_highbd_10_jnt_sub_pixel_avg_variance64x128); - - HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits10, - aom_highbd_10_masked_sub_pixel_variance128x128) - HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits10, - aom_highbd_10_masked_sub_pixel_variance128x64) - HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits10, - aom_highbd_10_masked_sub_pixel_variance64x128) - HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits10, - aom_highbd_10_masked_sub_pixel_variance64x64) - HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits10, - aom_highbd_10_masked_sub_pixel_variance64x32) - HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits10, - aom_highbd_10_masked_sub_pixel_variance32x64) - HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits10, - aom_highbd_10_masked_sub_pixel_variance32x32) - HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits10, - aom_highbd_10_masked_sub_pixel_variance32x16) - HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits10, - aom_highbd_10_masked_sub_pixel_variance16x32) - HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits10, - aom_highbd_10_masked_sub_pixel_variance16x16) - HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits10, - aom_highbd_10_masked_sub_pixel_variance8x16) - HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits10, - aom_highbd_10_masked_sub_pixel_variance16x8) - HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits10, - aom_highbd_10_masked_sub_pixel_variance8x8) - HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits10, - aom_highbd_10_masked_sub_pixel_variance4x8) - HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits10, - aom_highbd_10_masked_sub_pixel_variance8x4) - HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits10, - aom_highbd_10_masked_sub_pixel_variance4x4) - HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits10, - aom_highbd_10_masked_sub_pixel_variance64x16) - HIGHBD_MBFP(BLOCK_16X64, aom_highbd_masked_sad16x64_bits10, - aom_highbd_10_masked_sub_pixel_variance16x64) - HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits10, - aom_highbd_10_masked_sub_pixel_variance32x8) - HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits10, - aom_highbd_10_masked_sub_pixel_variance8x32) - HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits10, - aom_highbd_10_masked_sub_pixel_variance16x4) - HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits10, - aom_highbd_10_masked_sub_pixel_variance4x16) - HIGHBD_OBFP(BLOCK_128X128, aom_highbd_obmc_sad128x128_bits10, - aom_highbd_10_obmc_variance128x128, - aom_highbd_10_obmc_sub_pixel_variance128x128) - HIGHBD_OBFP(BLOCK_128X64, aom_highbd_obmc_sad128x64_bits10, - aom_highbd_10_obmc_variance128x64, - aom_highbd_10_obmc_sub_pixel_variance128x64) - HIGHBD_OBFP(BLOCK_64X128, aom_highbd_obmc_sad64x128_bits10, - aom_highbd_10_obmc_variance64x128, - aom_highbd_10_obmc_sub_pixel_variance64x128) - HIGHBD_OBFP(BLOCK_64X64, aom_highbd_obmc_sad64x64_bits10, - aom_highbd_10_obmc_variance64x64, - aom_highbd_10_obmc_sub_pixel_variance64x64) - HIGHBD_OBFP(BLOCK_64X32, aom_highbd_obmc_sad64x32_bits10, - aom_highbd_10_obmc_variance64x32, - aom_highbd_10_obmc_sub_pixel_variance64x32) - HIGHBD_OBFP(BLOCK_32X64, aom_highbd_obmc_sad32x64_bits10, - aom_highbd_10_obmc_variance32x64, - aom_highbd_10_obmc_sub_pixel_variance32x64) - HIGHBD_OBFP(BLOCK_32X32, aom_highbd_obmc_sad32x32_bits10, - aom_highbd_10_obmc_variance32x32, - aom_highbd_10_obmc_sub_pixel_variance32x32) - HIGHBD_OBFP(BLOCK_32X16, aom_highbd_obmc_sad32x16_bits10, - aom_highbd_10_obmc_variance32x16, - aom_highbd_10_obmc_sub_pixel_variance32x16) - HIGHBD_OBFP(BLOCK_16X32, aom_highbd_obmc_sad16x32_bits10, - aom_highbd_10_obmc_variance16x32, - aom_highbd_10_obmc_sub_pixel_variance16x32) - HIGHBD_OBFP(BLOCK_16X16, aom_highbd_obmc_sad16x16_bits10, - aom_highbd_10_obmc_variance16x16, - aom_highbd_10_obmc_sub_pixel_variance16x16) - HIGHBD_OBFP(BLOCK_8X16, aom_highbd_obmc_sad8x16_bits10, - aom_highbd_10_obmc_variance8x16, - aom_highbd_10_obmc_sub_pixel_variance8x16) - HIGHBD_OBFP(BLOCK_16X8, aom_highbd_obmc_sad16x8_bits10, - aom_highbd_10_obmc_variance16x8, - aom_highbd_10_obmc_sub_pixel_variance16x8) - HIGHBD_OBFP(BLOCK_8X8, aom_highbd_obmc_sad8x8_bits10, - aom_highbd_10_obmc_variance8x8, - aom_highbd_10_obmc_sub_pixel_variance8x8) - HIGHBD_OBFP(BLOCK_4X8, aom_highbd_obmc_sad4x8_bits10, - aom_highbd_10_obmc_variance4x8, - aom_highbd_10_obmc_sub_pixel_variance4x8) - HIGHBD_OBFP(BLOCK_8X4, aom_highbd_obmc_sad8x4_bits10, - aom_highbd_10_obmc_variance8x4, - aom_highbd_10_obmc_sub_pixel_variance8x4) - HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits10, - aom_highbd_10_obmc_variance4x4, - aom_highbd_10_obmc_sub_pixel_variance4x4) - - HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits10, - aom_highbd_10_obmc_variance64x16, - aom_highbd_10_obmc_sub_pixel_variance64x16) - - HIGHBD_OBFP(BLOCK_16X64, aom_highbd_obmc_sad16x64_bits10, - aom_highbd_10_obmc_variance16x64, - aom_highbd_10_obmc_sub_pixel_variance16x64) - - HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits10, - aom_highbd_10_obmc_variance32x8, - aom_highbd_10_obmc_sub_pixel_variance32x8) - - HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits10, - aom_highbd_10_obmc_variance8x32, - aom_highbd_10_obmc_sub_pixel_variance8x32) - - HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits10, - aom_highbd_10_obmc_variance16x4, - aom_highbd_10_obmc_sub_pixel_variance16x4) - - HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits10, - aom_highbd_10_obmc_variance4x16, - aom_highbd_10_obmc_sub_pixel_variance4x16) - break; - - case AOM_BITS_12: - HIGHBD_BFP(BLOCK_64X16, aom_highbd_sad64x16_bits12, - aom_highbd_sad64x16_avg_bits12, aom_highbd_12_variance64x16, - aom_highbd_12_sub_pixel_variance64x16, - aom_highbd_12_sub_pixel_avg_variance64x16, - aom_highbd_sad64x16x4d_bits12, - aom_highbd_jnt_sad64x16_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance64x16); - - HIGHBD_BFP(BLOCK_16X64, aom_highbd_sad16x64_bits12, - aom_highbd_sad16x64_avg_bits12, aom_highbd_12_variance16x64, - aom_highbd_12_sub_pixel_variance16x64, - aom_highbd_12_sub_pixel_avg_variance16x64, - aom_highbd_sad16x64x4d_bits12, - aom_highbd_jnt_sad16x64_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance16x64); - - HIGHBD_BFP(BLOCK_32X8, aom_highbd_sad32x8_bits12, - aom_highbd_sad32x8_avg_bits12, aom_highbd_12_variance32x8, - aom_highbd_12_sub_pixel_variance32x8, - aom_highbd_12_sub_pixel_avg_variance32x8, - aom_highbd_sad32x8x4d_bits12, - aom_highbd_jnt_sad32x8_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance32x8); - - HIGHBD_BFP(BLOCK_8X32, aom_highbd_sad8x32_bits12, - aom_highbd_sad8x32_avg_bits12, aom_highbd_12_variance8x32, - aom_highbd_12_sub_pixel_variance8x32, - aom_highbd_12_sub_pixel_avg_variance8x32, - aom_highbd_sad8x32x4d_bits12, - aom_highbd_jnt_sad8x32_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance8x32); - - HIGHBD_BFP(BLOCK_16X4, aom_highbd_sad16x4_bits12, - aom_highbd_sad16x4_avg_bits12, aom_highbd_12_variance16x4, - aom_highbd_12_sub_pixel_variance16x4, - aom_highbd_12_sub_pixel_avg_variance16x4, - aom_highbd_sad16x4x4d_bits12, - aom_highbd_jnt_sad16x4_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance16x4); - - HIGHBD_BFP(BLOCK_4X16, aom_highbd_sad4x16_bits12, - aom_highbd_sad4x16_avg_bits12, aom_highbd_12_variance4x16, - aom_highbd_12_sub_pixel_variance4x16, - aom_highbd_12_sub_pixel_avg_variance4x16, - aom_highbd_sad4x16x4d_bits12, - aom_highbd_jnt_sad4x16_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance4x16); - - HIGHBD_BFP(BLOCK_32X16, aom_highbd_sad32x16_bits12, - aom_highbd_sad32x16_avg_bits12, aom_highbd_12_variance32x16, - aom_highbd_12_sub_pixel_variance32x16, - aom_highbd_12_sub_pixel_avg_variance32x16, - aom_highbd_sad32x16x4d_bits12, - aom_highbd_jnt_sad32x16_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance32x16); - - HIGHBD_BFP(BLOCK_16X32, aom_highbd_sad16x32_bits12, - aom_highbd_sad16x32_avg_bits12, aom_highbd_12_variance16x32, - aom_highbd_12_sub_pixel_variance16x32, - aom_highbd_12_sub_pixel_avg_variance16x32, - aom_highbd_sad16x32x4d_bits12, - aom_highbd_jnt_sad16x32_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance16x32); - - HIGHBD_BFP(BLOCK_64X32, aom_highbd_sad64x32_bits12, - aom_highbd_sad64x32_avg_bits12, aom_highbd_12_variance64x32, - aom_highbd_12_sub_pixel_variance64x32, - aom_highbd_12_sub_pixel_avg_variance64x32, - aom_highbd_sad64x32x4d_bits12, - aom_highbd_jnt_sad64x32_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance64x32); - - HIGHBD_BFP(BLOCK_32X64, aom_highbd_sad32x64_bits12, - aom_highbd_sad32x64_avg_bits12, aom_highbd_12_variance32x64, - aom_highbd_12_sub_pixel_variance32x64, - aom_highbd_12_sub_pixel_avg_variance32x64, - aom_highbd_sad32x64x4d_bits12, - aom_highbd_jnt_sad32x64_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance32x64); - - HIGHBD_BFP(BLOCK_32X32, aom_highbd_sad32x32_bits12, - aom_highbd_sad32x32_avg_bits12, aom_highbd_12_variance32x32, - aom_highbd_12_sub_pixel_variance32x32, - aom_highbd_12_sub_pixel_avg_variance32x32, - aom_highbd_sad32x32x4d_bits12, - aom_highbd_jnt_sad32x32_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance32x32); - - HIGHBD_BFP(BLOCK_64X64, aom_highbd_sad64x64_bits12, - aom_highbd_sad64x64_avg_bits12, aom_highbd_12_variance64x64, - aom_highbd_12_sub_pixel_variance64x64, - aom_highbd_12_sub_pixel_avg_variance64x64, - aom_highbd_sad64x64x4d_bits12, - aom_highbd_jnt_sad64x64_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance64x64); - - HIGHBD_BFP(BLOCK_16X16, aom_highbd_sad16x16_bits12, - aom_highbd_sad16x16_avg_bits12, aom_highbd_12_variance16x16, - aom_highbd_12_sub_pixel_variance16x16, - aom_highbd_12_sub_pixel_avg_variance16x16, - aom_highbd_sad16x16x4d_bits12, - aom_highbd_jnt_sad16x16_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance16x16); - - HIGHBD_BFP(BLOCK_16X8, aom_highbd_sad16x8_bits12, - aom_highbd_sad16x8_avg_bits12, aom_highbd_12_variance16x8, - aom_highbd_12_sub_pixel_variance16x8, - aom_highbd_12_sub_pixel_avg_variance16x8, - aom_highbd_sad16x8x4d_bits12, - aom_highbd_jnt_sad16x8_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance16x8); - - HIGHBD_BFP(BLOCK_8X16, aom_highbd_sad8x16_bits12, - aom_highbd_sad8x16_avg_bits12, aom_highbd_12_variance8x16, - aom_highbd_12_sub_pixel_variance8x16, - aom_highbd_12_sub_pixel_avg_variance8x16, - aom_highbd_sad8x16x4d_bits12, - aom_highbd_jnt_sad8x16_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance8x16); - - HIGHBD_BFP( - BLOCK_8X8, aom_highbd_sad8x8_bits12, aom_highbd_sad8x8_avg_bits12, - aom_highbd_12_variance8x8, aom_highbd_12_sub_pixel_variance8x8, - aom_highbd_12_sub_pixel_avg_variance8x8, - aom_highbd_sad8x8x4d_bits12, aom_highbd_jnt_sad8x8_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance8x8); - - HIGHBD_BFP( - BLOCK_8X4, aom_highbd_sad8x4_bits12, aom_highbd_sad8x4_avg_bits12, - aom_highbd_12_variance8x4, aom_highbd_12_sub_pixel_variance8x4, - aom_highbd_12_sub_pixel_avg_variance8x4, - aom_highbd_sad8x4x4d_bits12, aom_highbd_jnt_sad8x4_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance8x4); - - HIGHBD_BFP( - BLOCK_4X8, aom_highbd_sad4x8_bits12, aom_highbd_sad4x8_avg_bits12, - aom_highbd_12_variance4x8, aom_highbd_12_sub_pixel_variance4x8, - aom_highbd_12_sub_pixel_avg_variance4x8, - aom_highbd_sad4x8x4d_bits12, aom_highbd_jnt_sad4x8_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance4x8); - - HIGHBD_BFP( - BLOCK_4X4, aom_highbd_sad4x4_bits12, aom_highbd_sad4x4_avg_bits12, - aom_highbd_12_variance4x4, aom_highbd_12_sub_pixel_variance4x4, - aom_highbd_12_sub_pixel_avg_variance4x4, - aom_highbd_sad4x4x4d_bits12, aom_highbd_jnt_sad4x4_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance4x4); - - HIGHBD_BFP(BLOCK_128X128, aom_highbd_sad128x128_bits12, - aom_highbd_sad128x128_avg_bits12, - aom_highbd_12_variance128x128, - aom_highbd_12_sub_pixel_variance128x128, - aom_highbd_12_sub_pixel_avg_variance128x128, - aom_highbd_sad128x128x4d_bits12, - aom_highbd_jnt_sad128x128_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance128x128); - - HIGHBD_BFP( - BLOCK_128X64, aom_highbd_sad128x64_bits12, - aom_highbd_sad128x64_avg_bits12, aom_highbd_12_variance128x64, - aom_highbd_12_sub_pixel_variance128x64, - aom_highbd_12_sub_pixel_avg_variance128x64, - aom_highbd_sad128x64x4d_bits12, aom_highbd_jnt_sad128x64_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance128x64); - - HIGHBD_BFP( - BLOCK_64X128, aom_highbd_sad64x128_bits12, - aom_highbd_sad64x128_avg_bits12, aom_highbd_12_variance64x128, - aom_highbd_12_sub_pixel_variance64x128, - aom_highbd_12_sub_pixel_avg_variance64x128, - aom_highbd_sad64x128x4d_bits12, aom_highbd_jnt_sad64x128_avg_bits12, - aom_highbd_12_jnt_sub_pixel_avg_variance64x128); - - HIGHBD_MBFP(BLOCK_128X128, aom_highbd_masked_sad128x128_bits12, - aom_highbd_12_masked_sub_pixel_variance128x128) - HIGHBD_MBFP(BLOCK_128X64, aom_highbd_masked_sad128x64_bits12, - aom_highbd_12_masked_sub_pixel_variance128x64) - HIGHBD_MBFP(BLOCK_64X128, aom_highbd_masked_sad64x128_bits12, - aom_highbd_12_masked_sub_pixel_variance64x128) - HIGHBD_MBFP(BLOCK_64X64, aom_highbd_masked_sad64x64_bits12, - aom_highbd_12_masked_sub_pixel_variance64x64) - HIGHBD_MBFP(BLOCK_64X32, aom_highbd_masked_sad64x32_bits12, - aom_highbd_12_masked_sub_pixel_variance64x32) - HIGHBD_MBFP(BLOCK_32X64, aom_highbd_masked_sad32x64_bits12, - aom_highbd_12_masked_sub_pixel_variance32x64) - HIGHBD_MBFP(BLOCK_32X32, aom_highbd_masked_sad32x32_bits12, - aom_highbd_12_masked_sub_pixel_variance32x32) - HIGHBD_MBFP(BLOCK_32X16, aom_highbd_masked_sad32x16_bits12, - aom_highbd_12_masked_sub_pixel_variance32x16) - HIGHBD_MBFP(BLOCK_16X32, aom_highbd_masked_sad16x32_bits12, - aom_highbd_12_masked_sub_pixel_variance16x32) - HIGHBD_MBFP(BLOCK_16X16, aom_highbd_masked_sad16x16_bits12, - aom_highbd_12_masked_sub_pixel_variance16x16) - HIGHBD_MBFP(BLOCK_8X16, aom_highbd_masked_sad8x16_bits12, - aom_highbd_12_masked_sub_pixel_variance8x16) - HIGHBD_MBFP(BLOCK_16X8, aom_highbd_masked_sad16x8_bits12, - aom_highbd_12_masked_sub_pixel_variance16x8) - HIGHBD_MBFP(BLOCK_8X8, aom_highbd_masked_sad8x8_bits12, - aom_highbd_12_masked_sub_pixel_variance8x8) - HIGHBD_MBFP(BLOCK_4X8, aom_highbd_masked_sad4x8_bits12, - aom_highbd_12_masked_sub_pixel_variance4x8) - HIGHBD_MBFP(BLOCK_8X4, aom_highbd_masked_sad8x4_bits12, - aom_highbd_12_masked_sub_pixel_variance8x4) - HIGHBD_MBFP(BLOCK_4X4, aom_highbd_masked_sad4x4_bits12, - aom_highbd_12_masked_sub_pixel_variance4x4) - HIGHBD_MBFP(BLOCK_64X16, aom_highbd_masked_sad64x16_bits12, - aom_highbd_12_masked_sub_pixel_variance64x16) - HIGHBD_MBFP(BLOCK_16X64, aom_highbd_masked_sad16x64_bits12, - aom_highbd_12_masked_sub_pixel_variance16x64) - HIGHBD_MBFP(BLOCK_32X8, aom_highbd_masked_sad32x8_bits12, - aom_highbd_12_masked_sub_pixel_variance32x8) - HIGHBD_MBFP(BLOCK_8X32, aom_highbd_masked_sad8x32_bits12, - aom_highbd_12_masked_sub_pixel_variance8x32) - HIGHBD_MBFP(BLOCK_16X4, aom_highbd_masked_sad16x4_bits12, - aom_highbd_12_masked_sub_pixel_variance16x4) - HIGHBD_MBFP(BLOCK_4X16, aom_highbd_masked_sad4x16_bits12, - aom_highbd_12_masked_sub_pixel_variance4x16) - HIGHBD_OBFP(BLOCK_128X128, aom_highbd_obmc_sad128x128_bits12, - aom_highbd_12_obmc_variance128x128, - aom_highbd_12_obmc_sub_pixel_variance128x128) - HIGHBD_OBFP(BLOCK_128X64, aom_highbd_obmc_sad128x64_bits12, - aom_highbd_12_obmc_variance128x64, - aom_highbd_12_obmc_sub_pixel_variance128x64) - HIGHBD_OBFP(BLOCK_64X128, aom_highbd_obmc_sad64x128_bits12, - aom_highbd_12_obmc_variance64x128, - aom_highbd_12_obmc_sub_pixel_variance64x128) - HIGHBD_OBFP(BLOCK_64X64, aom_highbd_obmc_sad64x64_bits12, - aom_highbd_12_obmc_variance64x64, - aom_highbd_12_obmc_sub_pixel_variance64x64) - HIGHBD_OBFP(BLOCK_64X32, aom_highbd_obmc_sad64x32_bits12, - aom_highbd_12_obmc_variance64x32, - aom_highbd_12_obmc_sub_pixel_variance64x32) - HIGHBD_OBFP(BLOCK_32X64, aom_highbd_obmc_sad32x64_bits12, - aom_highbd_12_obmc_variance32x64, - aom_highbd_12_obmc_sub_pixel_variance32x64) - HIGHBD_OBFP(BLOCK_32X32, aom_highbd_obmc_sad32x32_bits12, - aom_highbd_12_obmc_variance32x32, - aom_highbd_12_obmc_sub_pixel_variance32x32) - HIGHBD_OBFP(BLOCK_32X16, aom_highbd_obmc_sad32x16_bits12, - aom_highbd_12_obmc_variance32x16, - aom_highbd_12_obmc_sub_pixel_variance32x16) - HIGHBD_OBFP(BLOCK_16X32, aom_highbd_obmc_sad16x32_bits12, - aom_highbd_12_obmc_variance16x32, - aom_highbd_12_obmc_sub_pixel_variance16x32) - HIGHBD_OBFP(BLOCK_16X16, aom_highbd_obmc_sad16x16_bits12, - aom_highbd_12_obmc_variance16x16, - aom_highbd_12_obmc_sub_pixel_variance16x16) - HIGHBD_OBFP(BLOCK_8X16, aom_highbd_obmc_sad8x16_bits12, - aom_highbd_12_obmc_variance8x16, - aom_highbd_12_obmc_sub_pixel_variance8x16) - HIGHBD_OBFP(BLOCK_16X8, aom_highbd_obmc_sad16x8_bits12, - aom_highbd_12_obmc_variance16x8, - aom_highbd_12_obmc_sub_pixel_variance16x8) - HIGHBD_OBFP(BLOCK_8X8, aom_highbd_obmc_sad8x8_bits12, - aom_highbd_12_obmc_variance8x8, - aom_highbd_12_obmc_sub_pixel_variance8x8) - HIGHBD_OBFP(BLOCK_4X8, aom_highbd_obmc_sad4x8_bits12, - aom_highbd_12_obmc_variance4x8, - aom_highbd_12_obmc_sub_pixel_variance4x8) - HIGHBD_OBFP(BLOCK_8X4, aom_highbd_obmc_sad8x4_bits12, - aom_highbd_12_obmc_variance8x4, - aom_highbd_12_obmc_sub_pixel_variance8x4) - HIGHBD_OBFP(BLOCK_4X4, aom_highbd_obmc_sad4x4_bits12, - aom_highbd_12_obmc_variance4x4, - aom_highbd_12_obmc_sub_pixel_variance4x4) - HIGHBD_OBFP(BLOCK_64X16, aom_highbd_obmc_sad64x16_bits12, - aom_highbd_12_obmc_variance64x16, - aom_highbd_12_obmc_sub_pixel_variance64x16) - HIGHBD_OBFP(BLOCK_16X64, aom_highbd_obmc_sad16x64_bits12, - aom_highbd_12_obmc_variance16x64, - aom_highbd_12_obmc_sub_pixel_variance16x64) - HIGHBD_OBFP(BLOCK_32X8, aom_highbd_obmc_sad32x8_bits12, - aom_highbd_12_obmc_variance32x8, - aom_highbd_12_obmc_sub_pixel_variance32x8) - HIGHBD_OBFP(BLOCK_8X32, aom_highbd_obmc_sad8x32_bits12, - aom_highbd_12_obmc_variance8x32, - aom_highbd_12_obmc_sub_pixel_variance8x32) - HIGHBD_OBFP(BLOCK_16X4, aom_highbd_obmc_sad16x4_bits12, - aom_highbd_12_obmc_variance16x4, - aom_highbd_12_obmc_sub_pixel_variance16x4) - HIGHBD_OBFP(BLOCK_4X16, aom_highbd_obmc_sad4x16_bits12, - aom_highbd_12_obmc_variance4x16, - aom_highbd_12_obmc_sub_pixel_variance4x16) - break; - - default: - assert(0 && - "cm->seq_params.bit_depth should be AOM_BITS_8, " - "AOM_BITS_10 or AOM_BITS_12"); - } - } -} - -static void realloc_segmentation_maps(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - - // Create the encoder segmentation map and set all entries to 0 - aom_free(cpi->segmentation_map); - CHECK_MEM_ERROR(cm, cpi->segmentation_map, - aom_calloc(cm->mi_rows * cm->mi_cols, 1)); - - // Create a map used for cyclic background refresh. - if (cpi->cyclic_refresh) av1_cyclic_refresh_free(cpi->cyclic_refresh); - CHECK_MEM_ERROR(cm, cpi->cyclic_refresh, - av1_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols)); - - // Create a map used to mark inactive areas. - aom_free(cpi->active_map.map); - CHECK_MEM_ERROR(cm, cpi->active_map.map, - aom_calloc(cm->mi_rows * cm->mi_cols, 1)); -} - -void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) { - AV1_COMMON *const cm = &cpi->common; - SequenceHeader *const seq_params = &cm->seq_params; - const int num_planes = av1_num_planes(cm); - RATE_CONTROL *const rc = &cpi->rc; - MACROBLOCK *const x = &cpi->td.mb; - - if (seq_params->profile != oxcf->profile) seq_params->profile = oxcf->profile; - seq_params->bit_depth = oxcf->bit_depth; - seq_params->color_primaries = oxcf->color_primaries; - seq_params->transfer_characteristics = oxcf->transfer_characteristics; - seq_params->matrix_coefficients = oxcf->matrix_coefficients; - seq_params->monochrome = oxcf->monochrome; - seq_params->chroma_sample_position = oxcf->chroma_sample_position; - seq_params->color_range = oxcf->color_range; - - assert(IMPLIES(seq_params->profile <= PROFILE_1, - seq_params->bit_depth <= AOM_BITS_10)); - - cm->timing_info_present = oxcf->timing_info_present; - cm->timing_info.num_units_in_display_tick = - oxcf->timing_info.num_units_in_display_tick; - cm->timing_info.time_scale = oxcf->timing_info.time_scale; - cm->timing_info.equal_picture_interval = - oxcf->timing_info.equal_picture_interval; - cm->timing_info.num_ticks_per_picture = - oxcf->timing_info.num_ticks_per_picture; - - seq_params->display_model_info_present_flag = - oxcf->display_model_info_present_flag; - seq_params->decoder_model_info_present_flag = - oxcf->decoder_model_info_present_flag; - if (oxcf->decoder_model_info_present_flag) { - // set the decoder model parameters in schedule mode - cm->buffer_model.num_units_in_decoding_tick = - oxcf->buffer_model.num_units_in_decoding_tick; - cm->buffer_removal_time_present = 1; - set_aom_dec_model_info(&cm->buffer_model); - set_dec_model_op_parameters(&cm->op_params[0]); - } else if (cm->timing_info_present && - cm->timing_info.equal_picture_interval && - !seq_params->decoder_model_info_present_flag) { - // set the decoder model parameters in resource availability mode - set_resource_availability_parameters(&cm->op_params[0]); - } else { - cm->op_params[0].initial_display_delay = - 10; // Default value (not signaled) - } - - update_film_grain_parameters(cpi, oxcf); - - cpi->oxcf = *oxcf; - cpi->common.options = oxcf->cfg; - cpi->row_mt = oxcf->row_mt; - x->e_mbd.bd = (int)seq_params->bit_depth; - x->e_mbd.global_motion = cm->global_motion; - - if ((oxcf->pass == 0) && (oxcf->rc_mode == AOM_Q)) { - rc->baseline_gf_interval = FIXED_GF_INTERVAL; - } else { - rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2; - } - - cpi->refresh_last_frame = 1; - cpi->refresh_golden_frame = 0; - cpi->refresh_bwd_ref_frame = 0; - cpi->refresh_alt2_ref_frame = 0; - - cm->refresh_frame_context = (oxcf->frame_parallel_decoding_mode) - ? REFRESH_FRAME_CONTEXT_DISABLED - : REFRESH_FRAME_CONTEXT_BACKWARD; - if (oxcf->large_scale_tile) - cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED; - - if (x->palette_buffer == NULL) { - CHECK_MEM_ERROR(cm, x->palette_buffer, - aom_memalign(16, sizeof(*x->palette_buffer))); - } - - if (x->tmp_conv_dst == NULL) { - CHECK_MEM_ERROR( - cm, x->tmp_conv_dst, - aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE * sizeof(*x->tmp_conv_dst))); - x->e_mbd.tmp_conv_dst = x->tmp_conv_dst; - } - for (int i = 0; i < 2; ++i) { - if (x->tmp_obmc_bufs[i] == NULL) { - CHECK_MEM_ERROR(cm, x->tmp_obmc_bufs[i], - aom_memalign(16, 2 * MAX_MB_PLANE * MAX_SB_SQUARE * - sizeof(*x->tmp_obmc_bufs[i]))); - x->e_mbd.tmp_obmc_bufs[i] = x->tmp_obmc_bufs[i]; - } - } - - av1_reset_segment_features(cm); - set_high_precision_mv(cpi, 1, 0); - - set_rc_buffer_sizes(rc, &cpi->oxcf); - - // Under a configuration change, where maximum_buffer_size may change, - // keep buffer level clipped to the maximum allowed buffer size. - rc->bits_off_target = AOMMIN(rc->bits_off_target, rc->maximum_buffer_size); - rc->buffer_level = AOMMIN(rc->buffer_level, rc->maximum_buffer_size); - - // Set up frame rate and related parameters rate control values. - av1_new_framerate(cpi, cpi->framerate); - - // Set absolute upper and lower quality limits - rc->worst_quality = cpi->oxcf.worst_allowed_q; - rc->best_quality = cpi->oxcf.best_allowed_q; - - cm->interp_filter = oxcf->large_scale_tile ? EIGHTTAP_REGULAR : SWITCHABLE; - cm->switchable_motion_mode = 1; - - if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) { - cm->render_width = cpi->oxcf.render_width; - cm->render_height = cpi->oxcf.render_height; - } else { - cm->render_width = cpi->oxcf.width; - cm->render_height = cpi->oxcf.height; - } - cm->width = cpi->oxcf.width; - cm->height = cpi->oxcf.height; - - int sb_size = seq_params->sb_size; - // Superblock size should not be updated after the first key frame. - if (!cpi->seq_params_locked) { - set_sb_size(&cm->seq_params, select_sb_size(cpi)); - } - - if (cpi->initial_width || sb_size != seq_params->sb_size) { - if (cm->width > cpi->initial_width || cm->height > cpi->initial_height || - seq_params->sb_size != sb_size) { - av1_free_context_buffers(cm); - av1_free_pc_tree(&cpi->td, num_planes); - alloc_compressor_data(cpi); - realloc_segmentation_maps(cpi); - cpi->initial_width = cpi->initial_height = 0; - } - } - update_frame_size(cpi); - - cpi->alt_ref_source = NULL; - rc->is_src_frame_alt_ref = 0; - - rc->is_bwd_ref_frame = 0; - rc->is_last_bipred_frame = 0; - rc->is_bipred_frame = 0; - - set_tile_info(cpi); - - cpi->ext_refresh_frame_flags_pending = 0; - cpi->ext_refresh_frame_context_pending = 0; - - highbd_set_var_fns(cpi); - - // Init sequence level coding tools - // This should not be called after the first key frame. - if (!cpi->seq_params_locked) { - seq_params->operating_points_cnt_minus_1 = - cm->number_spatial_layers > 1 ? cm->number_spatial_layers - 1 : 0; - init_seq_coding_tools(&cm->seq_params, cm, oxcf); - } -} - -AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, - BufferPool *const pool) { - unsigned int i; - AV1_COMP *volatile const cpi = aom_memalign(32, sizeof(AV1_COMP)); - AV1_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL; - - if (!cm) return NULL; - - av1_zero(*cpi); - - // The jmp_buf is valid only for the duration of the function that calls - // setjmp(). Therefore, this function must reset the 'setjmp' field to 0 - // before it returns. - if (setjmp(cm->error.jmp)) { - cm->error.setjmp = 0; - av1_remove_compressor(cpi); - return 0; - } - - cm->error.setjmp = 1; - cm->alloc_mi = enc_alloc_mi; - cm->free_mi = enc_free_mi; - cm->setup_mi = enc_setup_mi; - - CHECK_MEM_ERROR(cm, cm->fc, - (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->fc))); - CHECK_MEM_ERROR(cm, cm->frame_contexts, - (FRAME_CONTEXT *)aom_memalign( - 32, FRAME_CONTEXTS * sizeof(*cm->frame_contexts))); - memset(cm->fc, 0, sizeof(*cm->fc)); - memset(cm->frame_contexts, 0, FRAME_CONTEXTS * sizeof(*cm->frame_contexts)); - - cpi->resize_state = 0; - cpi->resize_avg_qp = 0; - cpi->resize_buffer_underflow = 0; - - cpi->common.buffer_pool = pool; - - init_config(cpi, oxcf); - av1_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc); - - cm->current_video_frame = 0; - cpi->seq_params_locked = 0; - cpi->partition_search_skippable_frame = 0; - cpi->tile_data = NULL; - cpi->last_show_frame_buf_idx = INVALID_IDX; - - realloc_segmentation_maps(cpi); - - memset(cpi->nmv_costs, 0, sizeof(cpi->nmv_costs)); - memset(cpi->nmv_costs_hp, 0, sizeof(cpi->nmv_costs_hp)); - - for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0])); - i++) { - CHECK_MEM_ERROR( - cm, cpi->mbgraph_stats[i].mb_stats, - aom_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1)); - } - -#if CONFIG_FP_MB_STATS - cpi->use_fp_mb_stats = 0; - if (cpi->use_fp_mb_stats) { - // a place holder used to store the first pass mb stats in the first pass - CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf, - aom_calloc(cm->MBs * sizeof(uint8_t), 1)); - } else { - cpi->twopass.frame_mb_stats_buf = NULL; - } -#endif - - cpi->refresh_alt_ref_frame = 0; - - cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS; -#if CONFIG_INTERNAL_STATS - cpi->b_calculate_blockiness = 1; - cpi->b_calculate_consistency = 1; - cpi->total_inconsistency = 0; - cpi->psnr.worst = 100.0; - cpi->worst_ssim = 100.0; - - cpi->count = 0; - cpi->bytes = 0; - - if (cpi->b_calculate_psnr) { - cpi->total_sq_error = 0; - cpi->total_samples = 0; - cpi->tot_recode_hits = 0; - cpi->summed_quality = 0; - cpi->summed_weights = 0; - } - - cpi->fastssim.worst = 100.0; - cpi->psnrhvs.worst = 100.0; - - if (cpi->b_calculate_blockiness) { - cpi->total_blockiness = 0; - cpi->worst_blockiness = 0.0; - } - - if (cpi->b_calculate_consistency) { - CHECK_MEM_ERROR(cm, cpi->ssim_vars, - aom_malloc(sizeof(*cpi->ssim_vars) * 4 * - cpi->common.mi_rows * cpi->common.mi_cols)); - cpi->worst_consistency = 100.0; - } -#endif -#if CONFIG_ENTROPY_STATS - av1_zero(aggregate_fc); -#endif // CONFIG_ENTROPY_STATS - - cpi->first_time_stamp_ever = INT64_MAX; - - cpi->td.mb.nmvcost[0] = &cpi->nmv_costs[0][MV_MAX]; - cpi->td.mb.nmvcost[1] = &cpi->nmv_costs[1][MV_MAX]; - cpi->td.mb.nmvcost_hp[0] = &cpi->nmv_costs_hp[0][MV_MAX]; - cpi->td.mb.nmvcost_hp[1] = &cpi->nmv_costs_hp[1][MV_MAX]; - -#ifdef OUTPUT_YUV_SKINMAP - yuv_skinmap_file = fopen("skinmap.yuv", "ab"); -#endif -#ifdef OUTPUT_YUV_REC - yuv_rec_file = fopen("rec.yuv", "wb"); -#endif - - if (oxcf->pass == 1) { - av1_init_first_pass(cpi); - } else if (oxcf->pass == 2) { - const size_t packet_sz = sizeof(FIRSTPASS_STATS); - const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz); - -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - const size_t psz = cpi->common.MBs * sizeof(uint8_t); - const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz); - - cpi->twopass.firstpass_mb_stats.mb_stats_start = - oxcf->firstpass_mb_stats_in.buf; - cpi->twopass.firstpass_mb_stats.mb_stats_end = - cpi->twopass.firstpass_mb_stats.mb_stats_start + - (ps - 1) * cpi->common.MBs * sizeof(uint8_t); - } -#endif - - cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; - cpi->twopass.stats_in = cpi->twopass.stats_in_start; - cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1]; - - av1_init_second_pass(cpi); - } - - CHECK_MEM_ERROR( - cm, cpi->td.mb.above_pred_buf, - (uint8_t *)aom_memalign(16, MAX_MB_PLANE * MAX_SB_SQUARE * - sizeof(*cpi->td.mb.above_pred_buf))); - CHECK_MEM_ERROR( - cm, cpi->td.mb.left_pred_buf, - (uint8_t *)aom_memalign(16, MAX_MB_PLANE * MAX_SB_SQUARE * - sizeof(*cpi->td.mb.left_pred_buf))); - - CHECK_MEM_ERROR(cm, cpi->td.mb.wsrc_buf, - (int32_t *)aom_memalign( - 16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.wsrc_buf))); - - for (int x = 0; x < 2; x++) - for (int y = 0; y < 2; y++) - CHECK_MEM_ERROR( - cm, cpi->td.mb.hash_value_buffer[x][y], - (uint32_t *)aom_malloc(AOM_BUFFER_SIZE_FOR_BLOCK_HASH * - sizeof(*cpi->td.mb.hash_value_buffer[0][0]))); - - cpi->td.mb.g_crc_initialized = 0; - - CHECK_MEM_ERROR(cm, cpi->td.mb.mask_buf, - (int32_t *)aom_memalign( - 16, MAX_SB_SQUARE * sizeof(*cpi->td.mb.mask_buf))); - - av1_set_speed_features_framesize_independent(cpi); - av1_set_speed_features_framesize_dependent(cpi); - -#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, JSDAF, JSVAF) \ - cpi->fn_ptr[BT].sdf = SDF; \ - cpi->fn_ptr[BT].sdaf = SDAF; \ - cpi->fn_ptr[BT].vf = VF; \ - cpi->fn_ptr[BT].svf = SVF; \ - cpi->fn_ptr[BT].svaf = SVAF; \ - cpi->fn_ptr[BT].sdx4df = SDX4DF; \ - cpi->fn_ptr[BT].jsdaf = JSDAF; \ - cpi->fn_ptr[BT].jsvaf = JSVAF; - - BFP(BLOCK_4X16, aom_sad4x16, aom_sad4x16_avg, aom_variance4x16, - aom_sub_pixel_variance4x16, aom_sub_pixel_avg_variance4x16, - aom_sad4x16x4d, aom_jnt_sad4x16_avg, aom_jnt_sub_pixel_avg_variance4x16) - - BFP(BLOCK_16X4, aom_sad16x4, aom_sad16x4_avg, aom_variance16x4, - aom_sub_pixel_variance16x4, aom_sub_pixel_avg_variance16x4, - aom_sad16x4x4d, aom_jnt_sad16x4_avg, aom_jnt_sub_pixel_avg_variance16x4) - - BFP(BLOCK_8X32, aom_sad8x32, aom_sad8x32_avg, aom_variance8x32, - aom_sub_pixel_variance8x32, aom_sub_pixel_avg_variance8x32, - aom_sad8x32x4d, aom_jnt_sad8x32_avg, aom_jnt_sub_pixel_avg_variance8x32) - - BFP(BLOCK_32X8, aom_sad32x8, aom_sad32x8_avg, aom_variance32x8, - aom_sub_pixel_variance32x8, aom_sub_pixel_avg_variance32x8, - aom_sad32x8x4d, aom_jnt_sad32x8_avg, aom_jnt_sub_pixel_avg_variance32x8) - - BFP(BLOCK_16X64, aom_sad16x64, aom_sad16x64_avg, aom_variance16x64, - aom_sub_pixel_variance16x64, aom_sub_pixel_avg_variance16x64, - aom_sad16x64x4d, aom_jnt_sad16x64_avg, - aom_jnt_sub_pixel_avg_variance16x64) - - BFP(BLOCK_64X16, aom_sad64x16, aom_sad64x16_avg, aom_variance64x16, - aom_sub_pixel_variance64x16, aom_sub_pixel_avg_variance64x16, - aom_sad64x16x4d, aom_jnt_sad64x16_avg, - aom_jnt_sub_pixel_avg_variance64x16) - - BFP(BLOCK_128X128, aom_sad128x128, aom_sad128x128_avg, aom_variance128x128, - aom_sub_pixel_variance128x128, aom_sub_pixel_avg_variance128x128, - aom_sad128x128x4d, aom_jnt_sad128x128_avg, - aom_jnt_sub_pixel_avg_variance128x128) - - BFP(BLOCK_128X64, aom_sad128x64, aom_sad128x64_avg, aom_variance128x64, - aom_sub_pixel_variance128x64, aom_sub_pixel_avg_variance128x64, - aom_sad128x64x4d, aom_jnt_sad128x64_avg, - aom_jnt_sub_pixel_avg_variance128x64) - - BFP(BLOCK_64X128, aom_sad64x128, aom_sad64x128_avg, aom_variance64x128, - aom_sub_pixel_variance64x128, aom_sub_pixel_avg_variance64x128, - aom_sad64x128x4d, aom_jnt_sad64x128_avg, - aom_jnt_sub_pixel_avg_variance64x128) - - BFP(BLOCK_32X16, aom_sad32x16, aom_sad32x16_avg, aom_variance32x16, - aom_sub_pixel_variance32x16, aom_sub_pixel_avg_variance32x16, - aom_sad32x16x4d, aom_jnt_sad32x16_avg, - aom_jnt_sub_pixel_avg_variance32x16) - - BFP(BLOCK_16X32, aom_sad16x32, aom_sad16x32_avg, aom_variance16x32, - aom_sub_pixel_variance16x32, aom_sub_pixel_avg_variance16x32, - aom_sad16x32x4d, aom_jnt_sad16x32_avg, - aom_jnt_sub_pixel_avg_variance16x32) - - BFP(BLOCK_64X32, aom_sad64x32, aom_sad64x32_avg, aom_variance64x32, - aom_sub_pixel_variance64x32, aom_sub_pixel_avg_variance64x32, - aom_sad64x32x4d, aom_jnt_sad64x32_avg, - aom_jnt_sub_pixel_avg_variance64x32) - - BFP(BLOCK_32X64, aom_sad32x64, aom_sad32x64_avg, aom_variance32x64, - aom_sub_pixel_variance32x64, aom_sub_pixel_avg_variance32x64, - aom_sad32x64x4d, aom_jnt_sad32x64_avg, - aom_jnt_sub_pixel_avg_variance32x64) - - BFP(BLOCK_32X32, aom_sad32x32, aom_sad32x32_avg, aom_variance32x32, - aom_sub_pixel_variance32x32, aom_sub_pixel_avg_variance32x32, - aom_sad32x32x4d, aom_jnt_sad32x32_avg, - aom_jnt_sub_pixel_avg_variance32x32) - - BFP(BLOCK_64X64, aom_sad64x64, aom_sad64x64_avg, aom_variance64x64, - aom_sub_pixel_variance64x64, aom_sub_pixel_avg_variance64x64, - aom_sad64x64x4d, aom_jnt_sad64x64_avg, - aom_jnt_sub_pixel_avg_variance64x64) - - BFP(BLOCK_16X16, aom_sad16x16, aom_sad16x16_avg, aom_variance16x16, - aom_sub_pixel_variance16x16, aom_sub_pixel_avg_variance16x16, - aom_sad16x16x4d, aom_jnt_sad16x16_avg, - aom_jnt_sub_pixel_avg_variance16x16) - - BFP(BLOCK_16X8, aom_sad16x8, aom_sad16x8_avg, aom_variance16x8, - aom_sub_pixel_variance16x8, aom_sub_pixel_avg_variance16x8, - aom_sad16x8x4d, aom_jnt_sad16x8_avg, aom_jnt_sub_pixel_avg_variance16x8) - - BFP(BLOCK_8X16, aom_sad8x16, aom_sad8x16_avg, aom_variance8x16, - aom_sub_pixel_variance8x16, aom_sub_pixel_avg_variance8x16, - aom_sad8x16x4d, aom_jnt_sad8x16_avg, aom_jnt_sub_pixel_avg_variance8x16) - - BFP(BLOCK_8X8, aom_sad8x8, aom_sad8x8_avg, aom_variance8x8, - aom_sub_pixel_variance8x8, aom_sub_pixel_avg_variance8x8, aom_sad8x8x4d, - aom_jnt_sad8x8_avg, aom_jnt_sub_pixel_avg_variance8x8) - - BFP(BLOCK_8X4, aom_sad8x4, aom_sad8x4_avg, aom_variance8x4, - aom_sub_pixel_variance8x4, aom_sub_pixel_avg_variance8x4, aom_sad8x4x4d, - aom_jnt_sad8x4_avg, aom_jnt_sub_pixel_avg_variance8x4) - - BFP(BLOCK_4X8, aom_sad4x8, aom_sad4x8_avg, aom_variance4x8, - aom_sub_pixel_variance4x8, aom_sub_pixel_avg_variance4x8, aom_sad4x8x4d, - aom_jnt_sad4x8_avg, aom_jnt_sub_pixel_avg_variance4x8) - - BFP(BLOCK_4X4, aom_sad4x4, aom_sad4x4_avg, aom_variance4x4, - aom_sub_pixel_variance4x4, aom_sub_pixel_avg_variance4x4, aom_sad4x4x4d, - aom_jnt_sad4x4_avg, aom_jnt_sub_pixel_avg_variance4x4) - -#define OBFP(BT, OSDF, OVF, OSVF) \ - cpi->fn_ptr[BT].osdf = OSDF; \ - cpi->fn_ptr[BT].ovf = OVF; \ - cpi->fn_ptr[BT].osvf = OSVF; - - OBFP(BLOCK_128X128, aom_obmc_sad128x128, aom_obmc_variance128x128, - aom_obmc_sub_pixel_variance128x128) - OBFP(BLOCK_128X64, aom_obmc_sad128x64, aom_obmc_variance128x64, - aom_obmc_sub_pixel_variance128x64) - OBFP(BLOCK_64X128, aom_obmc_sad64x128, aom_obmc_variance64x128, - aom_obmc_sub_pixel_variance64x128) - OBFP(BLOCK_64X64, aom_obmc_sad64x64, aom_obmc_variance64x64, - aom_obmc_sub_pixel_variance64x64) - OBFP(BLOCK_64X32, aom_obmc_sad64x32, aom_obmc_variance64x32, - aom_obmc_sub_pixel_variance64x32) - OBFP(BLOCK_32X64, aom_obmc_sad32x64, aom_obmc_variance32x64, - aom_obmc_sub_pixel_variance32x64) - OBFP(BLOCK_32X32, aom_obmc_sad32x32, aom_obmc_variance32x32, - aom_obmc_sub_pixel_variance32x32) - OBFP(BLOCK_32X16, aom_obmc_sad32x16, aom_obmc_variance32x16, - aom_obmc_sub_pixel_variance32x16) - OBFP(BLOCK_16X32, aom_obmc_sad16x32, aom_obmc_variance16x32, - aom_obmc_sub_pixel_variance16x32) - OBFP(BLOCK_16X16, aom_obmc_sad16x16, aom_obmc_variance16x16, - aom_obmc_sub_pixel_variance16x16) - OBFP(BLOCK_16X8, aom_obmc_sad16x8, aom_obmc_variance16x8, - aom_obmc_sub_pixel_variance16x8) - OBFP(BLOCK_8X16, aom_obmc_sad8x16, aom_obmc_variance8x16, - aom_obmc_sub_pixel_variance8x16) - OBFP(BLOCK_8X8, aom_obmc_sad8x8, aom_obmc_variance8x8, - aom_obmc_sub_pixel_variance8x8) - OBFP(BLOCK_4X8, aom_obmc_sad4x8, aom_obmc_variance4x8, - aom_obmc_sub_pixel_variance4x8) - OBFP(BLOCK_8X4, aom_obmc_sad8x4, aom_obmc_variance8x4, - aom_obmc_sub_pixel_variance8x4) - OBFP(BLOCK_4X4, aom_obmc_sad4x4, aom_obmc_variance4x4, - aom_obmc_sub_pixel_variance4x4) - OBFP(BLOCK_4X16, aom_obmc_sad4x16, aom_obmc_variance4x16, - aom_obmc_sub_pixel_variance4x16) - OBFP(BLOCK_16X4, aom_obmc_sad16x4, aom_obmc_variance16x4, - aom_obmc_sub_pixel_variance16x4) - OBFP(BLOCK_8X32, aom_obmc_sad8x32, aom_obmc_variance8x32, - aom_obmc_sub_pixel_variance8x32) - OBFP(BLOCK_32X8, aom_obmc_sad32x8, aom_obmc_variance32x8, - aom_obmc_sub_pixel_variance32x8) - OBFP(BLOCK_16X64, aom_obmc_sad16x64, aom_obmc_variance16x64, - aom_obmc_sub_pixel_variance16x64) - OBFP(BLOCK_64X16, aom_obmc_sad64x16, aom_obmc_variance64x16, - aom_obmc_sub_pixel_variance64x16) - -#define MBFP(BT, MCSDF, MCSVF) \ - cpi->fn_ptr[BT].msdf = MCSDF; \ - cpi->fn_ptr[BT].msvf = MCSVF; - - MBFP(BLOCK_128X128, aom_masked_sad128x128, - aom_masked_sub_pixel_variance128x128) - MBFP(BLOCK_128X64, aom_masked_sad128x64, aom_masked_sub_pixel_variance128x64) - MBFP(BLOCK_64X128, aom_masked_sad64x128, aom_masked_sub_pixel_variance64x128) - MBFP(BLOCK_64X64, aom_masked_sad64x64, aom_masked_sub_pixel_variance64x64) - MBFP(BLOCK_64X32, aom_masked_sad64x32, aom_masked_sub_pixel_variance64x32) - MBFP(BLOCK_32X64, aom_masked_sad32x64, aom_masked_sub_pixel_variance32x64) - MBFP(BLOCK_32X32, aom_masked_sad32x32, aom_masked_sub_pixel_variance32x32) - MBFP(BLOCK_32X16, aom_masked_sad32x16, aom_masked_sub_pixel_variance32x16) - MBFP(BLOCK_16X32, aom_masked_sad16x32, aom_masked_sub_pixel_variance16x32) - MBFP(BLOCK_16X16, aom_masked_sad16x16, aom_masked_sub_pixel_variance16x16) - MBFP(BLOCK_16X8, aom_masked_sad16x8, aom_masked_sub_pixel_variance16x8) - MBFP(BLOCK_8X16, aom_masked_sad8x16, aom_masked_sub_pixel_variance8x16) - MBFP(BLOCK_8X8, aom_masked_sad8x8, aom_masked_sub_pixel_variance8x8) - MBFP(BLOCK_4X8, aom_masked_sad4x8, aom_masked_sub_pixel_variance4x8) - MBFP(BLOCK_8X4, aom_masked_sad8x4, aom_masked_sub_pixel_variance8x4) - MBFP(BLOCK_4X4, aom_masked_sad4x4, aom_masked_sub_pixel_variance4x4) - - MBFP(BLOCK_4X16, aom_masked_sad4x16, aom_masked_sub_pixel_variance4x16) - - MBFP(BLOCK_16X4, aom_masked_sad16x4, aom_masked_sub_pixel_variance16x4) - - MBFP(BLOCK_8X32, aom_masked_sad8x32, aom_masked_sub_pixel_variance8x32) - - MBFP(BLOCK_32X8, aom_masked_sad32x8, aom_masked_sub_pixel_variance32x8) - - MBFP(BLOCK_16X64, aom_masked_sad16x64, aom_masked_sub_pixel_variance16x64) - - MBFP(BLOCK_64X16, aom_masked_sad64x16, aom_masked_sub_pixel_variance64x16) - - highbd_set_var_fns(cpi); - - /* av1_init_quantizer() is first called here. Add check in - * av1_frame_init_quantizer() so that av1_init_quantizer is only - * called later when needed. This will avoid unnecessary calls of - * av1_init_quantizer() for every frame. - */ - av1_init_quantizer(cpi); - av1_qm_init(cm); - - av1_loop_filter_init(cm); - cm->superres_scale_denominator = SCALE_NUMERATOR; - cm->superres_upscaled_width = oxcf->width; - cm->superres_upscaled_height = oxcf->height; - av1_loop_restoration_precal(); - - cm->error.setjmp = 0; - - return cpi; -} - -#if CONFIG_INTERNAL_STATS -#define SNPRINT(H, T) snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T)) - -#define SNPRINT2(H, T, V) \ - snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V)) -#endif // CONFIG_INTERNAL_STATS - -void av1_remove_compressor(AV1_COMP *cpi) { - AV1_COMMON *cm; - unsigned int i; - int t; - - if (!cpi) return; - - cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - - if (cm->current_video_frame > 0) { -#if CONFIG_ENTROPY_STATS - if (cpi->oxcf.pass != 1) { - fprintf(stderr, "Writing counts.stt\n"); - FILE *f = fopen("counts.stt", "wb"); - fwrite(&aggregate_fc, sizeof(aggregate_fc), 1, f); - fclose(f); - } -#endif // CONFIG_ENTROPY_STATS -#if CONFIG_INTERNAL_STATS - aom_clear_system_state(); - - if (cpi->oxcf.pass != 1) { - char headings[512] = { 0 }; - char results[512] = { 0 }; - FILE *f = fopen("opsnr.stt", "a"); - double time_encoded = - (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) / - 10000000.000; - double total_encode_time = - (cpi->time_receive_data + cpi->time_compress_data) / 1000.000; - const double dr = - (double)cpi->bytes * (double)8 / (double)1000 / time_encoded; - const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1); - const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000; - const double rate_err = ((100.0 * (dr - target_rate)) / target_rate); - - if (cpi->b_calculate_psnr) { - const double total_psnr = aom_sse_to_psnr( - (double)cpi->total_samples, peak, (double)cpi->total_sq_error); - const double total_ssim = - 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0); - snprintf(headings, sizeof(headings), - "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t" - "AOMSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t" - "WstPsnr\tWstSsim\tWstFast\tWstHVS\t" - "AVPsrnY\tAPsnrCb\tAPsnrCr"); - snprintf(results, sizeof(results), - "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t" - "%7.3f\t%7.3f\t%7.3f\t%7.3f\t" - "%7.3f\t%7.3f\t%7.3f\t%7.3f\t" - "%7.3f\t%7.3f\t%7.3f", - dr, cpi->psnr.stat[STAT_ALL] / cpi->count, total_psnr, - cpi->psnr.stat[STAT_ALL] / cpi->count, total_psnr, total_ssim, - total_ssim, cpi->fastssim.stat[STAT_ALL] / cpi->count, - cpi->psnrhvs.stat[STAT_ALL] / cpi->count, cpi->psnr.worst, - cpi->worst_ssim, cpi->fastssim.worst, cpi->psnrhvs.worst, - cpi->psnr.stat[STAT_Y] / cpi->count, - cpi->psnr.stat[STAT_U] / cpi->count, - cpi->psnr.stat[STAT_V] / cpi->count); - - if (cpi->b_calculate_blockiness) { - SNPRINT(headings, "\t Block\tWstBlck"); - SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count); - SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness); - } - - if (cpi->b_calculate_consistency) { - double consistency = - aom_sse_to_psnr((double)cpi->total_samples, peak, - (double)cpi->total_inconsistency); - - SNPRINT(headings, "\tConsist\tWstCons"); - SNPRINT2(results, "\t%7.3f", consistency); - SNPRINT2(results, "\t%7.3f", cpi->worst_consistency); - } - fprintf(f, "%s\t Time\tRcErr\tAbsErr\n", headings); - fprintf(f, "%s\t%8.0f\t%7.2f\t%7.2f\n", results, total_encode_time, - rate_err, fabs(rate_err)); - } - - fclose(f); - } -#endif // CONFIG_INTERNAL_STATS - } - - for (t = 0; t < cpi->num_workers; ++t) { - AVxWorker *const worker = &cpi->workers[t]; - EncWorkerData *const thread_data = &cpi->tile_thr_data[t]; - - // Deallocate allocated threads. - aom_get_worker_interface()->end(worker); - - // Deallocate allocated thread data. - if (t < cpi->num_workers - 1) { - aom_free(thread_data->td->palette_buffer); - aom_free(thread_data->td->tmp_conv_dst); - for (int j = 0; j < 2; ++j) { - aom_free(thread_data->td->tmp_obmc_bufs[j]); - } - aom_free(thread_data->td->above_pred_buf); - aom_free(thread_data->td->left_pred_buf); - aom_free(thread_data->td->wsrc_buf); - for (int x = 0; x < 2; x++) { - for (int y = 0; y < 2; y++) { - aom_free(thread_data->td->hash_value_buffer[x][y]); - thread_data->td->hash_value_buffer[x][y] = NULL; - } - } - aom_free(thread_data->td->mask_buf); - aom_free(thread_data->td->counts); - av1_free_pc_tree(thread_data->td, num_planes); - aom_free(thread_data->td); - } - } - aom_free(cpi->tile_thr_data); - aom_free(cpi->workers); - - if (cpi->num_workers > 1) { - av1_loop_filter_dealloc(&cpi->lf_row_sync); - av1_loop_restoration_dealloc(&cpi->lr_row_sync, cpi->num_workers); - } - - dealloc_compressor_data(cpi); - - for (i = 0; i < sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]); - ++i) { - aom_free(cpi->mbgraph_stats[i].mb_stats); - } - -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - aom_free(cpi->twopass.frame_mb_stats_buf); - cpi->twopass.frame_mb_stats_buf = NULL; - } -#endif -#if CONFIG_INTERNAL_STATS - aom_free(cpi->ssim_vars); - cpi->ssim_vars = NULL; -#endif // CONFIG_INTERNAL_STATS - - av1_remove_common(cm); - for (i = 0; i < FRAME_BUFFERS; ++i) { - av1_hash_table_destroy(&cm->buffer_pool->frame_bufs[i].hash_table); - } - if (cpi->sf.use_hash_based_trellis) hbt_destroy(); - av1_free_ref_frame_buffers(cm->buffer_pool); - aom_free(cpi); - -#ifdef OUTPUT_YUV_SKINMAP - fclose(yuv_skinmap_file); -#endif -#ifdef OUTPUT_YUV_REC - fclose(yuv_rec_file); -#endif -} - -static void generate_psnr_packet(AV1_COMP *cpi) { - struct aom_codec_cx_pkt pkt; - int i; - PSNR_STATS psnr; - aom_calc_highbd_psnr(cpi->source, cpi->common.frame_to_show, &psnr, - cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth); - - for (i = 0; i < 4; ++i) { - pkt.data.psnr.samples[i] = psnr.samples[i]; - pkt.data.psnr.sse[i] = psnr.sse[i]; - pkt.data.psnr.psnr[i] = psnr.psnr[i]; - } - pkt.kind = AOM_CODEC_PSNR_PKT; - aom_codec_pkt_list_add(cpi->output_pkt_list, &pkt); -} - -int av1_use_as_reference(AV1_COMP *cpi, int ref_frame_flags) { - if (ref_frame_flags > ((1 << INTER_REFS_PER_FRAME) - 1)) return -1; - - cpi->ext_ref_frame_flags = ref_frame_flags; - return 0; -} - -void av1_update_reference(AV1_COMP *cpi, int ref_frame_upd_flags) { - cpi->ext_refresh_last_frame = (ref_frame_upd_flags & AOM_LAST_FLAG) != 0; - cpi->ext_refresh_golden_frame = (ref_frame_upd_flags & AOM_GOLD_FLAG) != 0; - cpi->ext_refresh_alt_ref_frame = (ref_frame_upd_flags & AOM_ALT_FLAG) != 0; - cpi->ext_refresh_bwd_ref_frame = (ref_frame_upd_flags & AOM_BWD_FLAG) != 0; - cpi->ext_refresh_alt2_ref_frame = (ref_frame_upd_flags & AOM_ALT2_FLAG) != 0; - cpi->ext_refresh_frame_flags_pending = 1; -} - -int av1_copy_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd) { - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - YV12_BUFFER_CONFIG *cfg = get_ref_frame(cm, idx); - if (cfg) { - aom_yv12_copy_frame(cfg, sd, num_planes); - return 0; - } else { - return -1; - } -} - -int av1_set_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd) { - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - YV12_BUFFER_CONFIG *cfg = get_ref_frame(cm, idx); - if (cfg) { - aom_yv12_copy_frame(sd, cfg, num_planes); - return 0; - } else { - return -1; - } -} - -int av1_update_entropy(AV1_COMP *cpi, int update) { - cpi->ext_refresh_frame_context = update; - cpi->ext_refresh_frame_context_pending = 1; - return 0; -} - -#if defined(OUTPUT_YUV_DENOISED) || defined(OUTPUT_YUV_SKINMAP) -// The denoiser buffer is allocated as a YUV 440 buffer. This function writes it -// as YUV 420. We simply use the top-left pixels of the UV buffers, since we do -// not denoise the UV channels at this time. If ever we implement UV channel -// denoising we will have to modify this. -void aom_write_yuv_frame_420(YV12_BUFFER_CONFIG *s, FILE *f) { - uint8_t *src = s->y_buffer; - int h = s->y_height; - - do { - fwrite(src, s->y_width, 1, f); - src += s->y_stride; - } while (--h); - - src = s->u_buffer; - h = s->uv_height; - - do { - fwrite(src, s->uv_width, 1, f); - src += s->uv_stride; - } while (--h); - - src = s->v_buffer; - h = s->uv_height; - - do { - fwrite(src, s->uv_width, 1, f); - src += s->uv_stride; - } while (--h); -} -#endif - -static void check_show_existing_frame(AV1_COMP *cpi) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - AV1_COMMON *const cm = &cpi->common; - const FRAME_UPDATE_TYPE next_frame_update_type = - gf_group->update_type[gf_group->index]; -#if USE_SYMM_MULTI_LAYER - const int which_arf = (cpi->new_bwdref_update_rule == 1) - ? gf_group->arf_update_idx[gf_group->index] > 0 - : gf_group->arf_update_idx[gf_group->index]; -#else - const int which_arf = gf_group->arf_update_idx[gf_group->index]; -#endif - - if (cm->show_existing_frame == 1) { - cm->show_existing_frame = 0; - } else if (cpi->rc.is_last_bipred_frame) { -#if USE_SYMM_MULTI_LAYER - // NOTE: When new structure is used, every bwdref will have one overlay - // frame. Therefore, there is no need to find out which frame to - // show in advance. - if (cpi->new_bwdref_update_rule == 0) { -#endif - // NOTE: If the current frame is a last bi-predictive frame, it is - // needed next to show the BWDREF_FRAME, which is pointed by - // the last_fb_idxes[0] after reference frame buffer update - cpi->rc.is_last_bipred_frame = 0; - cm->show_existing_frame = 1; - cpi->existing_fb_idx_to_show = cpi->ref_fb_idx[0]; -#if USE_SYMM_MULTI_LAYER - } -#endif - } else if (cpi->is_arf_filter_off[which_arf] && - (next_frame_update_type == OVERLAY_UPDATE || - next_frame_update_type == INTNL_OVERLAY_UPDATE)) { -#if USE_SYMM_MULTI_LAYER - const int bwdref_to_show = - (cpi->new_bwdref_update_rule == 1) ? BWDREF_FRAME : ALTREF2_FRAME; -#else - const int bwdref_to_show = ALTREF2_FRAME; -#endif - // Other parameters related to OVERLAY_UPDATE will be taken care of - // in av1_rc_get_second_pass_params(cpi) - cm->show_existing_frame = 1; - cpi->rc.is_src_frame_alt_ref = 1; - cpi->existing_fb_idx_to_show = (next_frame_update_type == OVERLAY_UPDATE) - ? cpi->ref_fb_idx[ALTREF_FRAME - 1] - : cpi->ref_fb_idx[bwdref_to_show - 1]; -#if USE_SYMM_MULTI_LAYER - if (cpi->new_bwdref_update_rule == 0) -#endif - cpi->is_arf_filter_off[which_arf] = 0; - } - cpi->rc.is_src_frame_ext_arf = 0; -} - -#ifdef OUTPUT_YUV_REC -void aom_write_one_yuv_frame(AV1_COMMON *cm, YV12_BUFFER_CONFIG *s) { - uint8_t *src = s->y_buffer; - int h = cm->height; - if (yuv_rec_file == NULL) return; - if (s->flags & YV12_FLAG_HIGHBITDEPTH) { - uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer); - - do { - fwrite(src16, s->y_width, 2, yuv_rec_file); - src16 += s->y_stride; - } while (--h); - - src16 = CONVERT_TO_SHORTPTR(s->u_buffer); - h = s->uv_height; - - do { - fwrite(src16, s->uv_width, 2, yuv_rec_file); - src16 += s->uv_stride; - } while (--h); - - src16 = CONVERT_TO_SHORTPTR(s->v_buffer); - h = s->uv_height; - - do { - fwrite(src16, s->uv_width, 2, yuv_rec_file); - src16 += s->uv_stride; - } while (--h); - - fflush(yuv_rec_file); - return; - } - - do { - fwrite(src, s->y_width, 1, yuv_rec_file); - src += s->y_stride; - } while (--h); - - src = s->u_buffer; - h = s->uv_height; - - do { - fwrite(src, s->uv_width, 1, yuv_rec_file); - src += s->uv_stride; - } while (--h); - - src = s->v_buffer; - h = s->uv_height; - - do { - fwrite(src, s->uv_width, 1, yuv_rec_file); - src += s->uv_stride; - } while (--h); - - fflush(yuv_rec_file); -} -#endif // OUTPUT_YUV_REC - -#define GM_RECODE_LOOP_NUM4X4_FACTOR 192 -static int recode_loop_test_global_motion(AV1_COMP *cpi) { - int i; - int recode = 0; - RD_COUNTS *const rdc = &cpi->td.rd_counts; - AV1_COMMON *const cm = &cpi->common; - for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { - if (cm->global_motion[i].wmtype != IDENTITY && - rdc->global_motion_used[i] * GM_RECODE_LOOP_NUM4X4_FACTOR < - cpi->gmparams_cost[i]) { - cm->global_motion[i] = default_warp_params; - assert(cm->global_motion[i].wmtype == IDENTITY); - cpi->gmparams_cost[i] = 0; - recode = 1; - // TODO(sarahparker): The earlier condition for recoding here was: - // "recode |= (rdc->global_motion_used[i] > 0);". Can we bring something - // similar to that back to speed up global motion? - } - } - return recode; -} - -// Function to test for conditions that indicate we should loop -// back and recode a frame. -static int recode_loop_test(AV1_COMP *cpi, int high_limit, int low_limit, int q, - int maxq, int minq) { - const RATE_CONTROL *const rc = &cpi->rc; - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - const int frame_is_kfgfarf = frame_is_kf_gf_arf(cpi); - int force_recode = 0; - - if ((rc->projected_frame_size >= rc->max_frame_bandwidth) || - (cpi->sf.recode_loop == ALLOW_RECODE) || - (frame_is_kfgfarf && (cpi->sf.recode_loop == ALLOW_RECODE_KFARFGF))) { - // TODO(agrange) high_limit could be greater than the scale-down threshold. - if ((rc->projected_frame_size > high_limit && q < maxq) || - (rc->projected_frame_size < low_limit && q > minq)) { - force_recode = 1; - } else if (cpi->oxcf.rc_mode == AOM_CQ) { - // Deal with frame undershoot and whether or not we are - // below the automatically set cq level. - if (q > oxcf->cq_level && - rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) { - force_recode = 1; - } - } - } - return force_recode; -} - -#define DUMP_REF_FRAME_IMAGES 0 - -#if DUMP_REF_FRAME_IMAGES == 1 -static int dump_one_image(AV1_COMMON *cm, - const YV12_BUFFER_CONFIG *const ref_buf, - char *file_name) { - int h; - FILE *f_ref = NULL; - - if (ref_buf == NULL) { - printf("Frame data buffer is NULL.\n"); - return AOM_CODEC_MEM_ERROR; - } - - if ((f_ref = fopen(file_name, "wb")) == NULL) { - printf("Unable to open file %s to write.\n", file_name); - return AOM_CODEC_MEM_ERROR; - } - - // --- Y --- - for (h = 0; h < cm->height; ++h) { - fwrite(&ref_buf->y_buffer[h * ref_buf->y_stride], 1, cm->width, f_ref); - } - // --- U --- - for (h = 0; h < (cm->height >> 1); ++h) { - fwrite(&ref_buf->u_buffer[h * ref_buf->uv_stride], 1, (cm->width >> 1), - f_ref); - } - // --- V --- - for (h = 0; h < (cm->height >> 1); ++h) { - fwrite(&ref_buf->v_buffer[h * ref_buf->uv_stride], 1, (cm->width >> 1), - f_ref); - } - - fclose(f_ref); - - return AOM_CODEC_OK; -} - -static void dump_ref_frame_images(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - MV_REFERENCE_FRAME ref_frame; - - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - char file_name[256] = ""; - snprintf(file_name, sizeof(file_name), "/tmp/enc_F%d_ref_%d.yuv", - cm->current_video_frame, ref_frame); - dump_one_image(cm, get_ref_frame_buffer(cpi, ref_frame), file_name); - } -} -#endif // DUMP_REF_FRAME_IMAGES == 1 - -// This function is used to shift the virtual indices of last reference frames -// as follows: -// LAST_FRAME -> LAST2_FRAME -> LAST3_FRAME -// when the LAST_FRAME is updated. -static INLINE void shift_last_ref_frames(AV1_COMP *cpi) { - // TODO(isbs): shift the scaled indices as well - int ref_frame; - for (ref_frame = LAST_REF_FRAMES - 1; ref_frame > 0; --ref_frame) { - cpi->ref_fb_idx[ref_frame] = cpi->ref_fb_idx[ref_frame - 1]; - - // [0] is allocated to the current coded frame. The statistics for the - // reference frames start at [LAST_FRAME], i.e. [1]. - if (!cpi->rc.is_src_frame_alt_ref) { - memcpy(cpi->interp_filter_selected[ref_frame + LAST_FRAME], - cpi->interp_filter_selected[ref_frame - 1 + LAST_FRAME], - sizeof(cpi->interp_filter_selected[ref_frame - 1 + LAST_FRAME])); - } - } -} - -#if USE_SYMM_MULTI_LAYER -// This function is used to shift the virtual indices of bwd reference -// frames as follows: -// BWD_REF -> ALT2_REF -> EXT_REF -// to clear a space to store the closest bwdref -static INLINE void rshift_bwd_ref_frames(AV1_COMP *cpi) { - // TODO(isbs): shift the scaled indices as well - static const int ordered_bwd[3] = { BWDREF_FRAME - 1, ALTREF2_FRAME - 1, - EXTREF_FRAME - 1 }; - - for (int i = 2; i > 0; --i) { - // [0] is allocated to the current coded frame, i.e. bwdref - memcpy( - cpi->interp_filter_selected[ordered_bwd[i] + LAST_FRAME], - cpi->interp_filter_selected[ordered_bwd[i - 1] + LAST_FRAME], - sizeof(cpi->interp_filter_selected[ordered_bwd[i - 1] + LAST_FRAME])); - - cpi->ref_fb_idx[ordered_bwd[i]] = cpi->ref_fb_idx[ordered_bwd[i - 1]]; - } -} - -// This function is used to shift the virtual indices of bwd reference -// frames as follows: -// BWD_REF <- ALT2_REF <- EXT_REF -// to update the bwd reference frame for coding the next frame. -static INLINE void lshift_bwd_ref_frames(AV1_COMP *cpi) { - // TODO(isbs): shift the scaled indices as well - static const int ordered_bwd[3] = { BWDREF_FRAME - 1, ALTREF2_FRAME - 1, - EXTREF_FRAME - 1 }; - - for (int i = 0; i < 2; ++i) { - // [0] is allocated to the current coded frame, i.e. bwdref - memcpy( - cpi->interp_filter_selected[ordered_bwd[i] + LAST_FRAME], - cpi->interp_filter_selected[ordered_bwd[i + 1] + LAST_FRAME], - sizeof(cpi->interp_filter_selected[ordered_bwd[i + 1] + LAST_FRAME])); - - cpi->ref_fb_idx[ordered_bwd[i]] = cpi->ref_fb_idx[ordered_bwd[i + 1]]; - } -} -#endif // USE_SYMM_MULTI_LAYER - -static void update_reference_frames(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - - // NOTE: Save the new show frame buffer index for --test-code=warn, i.e., - // for the purpose to verify no mismatch between encoder and decoder. - if (cm->show_frame) cpi->last_show_frame_buf_idx = cm->new_fb_idx; - - // In the case of show_existing frame, we will not send fresh flag - // to decoder. Any change in the reference frame buffer can be done by - // switching the virtual indices. - if (cm->show_existing_frame) { - cpi->refresh_last_frame = 0; - cpi->refresh_golden_frame = 0; - cpi->refresh_bwd_ref_frame = 0; - cpi->refresh_alt2_ref_frame = 0; - cpi->refresh_alt_ref_frame = 0; - - cpi->rc.is_bwd_ref_frame = 0; - cpi->rc.is_last_bipred_frame = 0; - cpi->rc.is_bipred_frame = 0; - } - - BufferPool *const pool = cm->buffer_pool; - - // At this point the new frame has been encoded. - // If any buffer copy / swapping is signaled it should be done here. - - // Only update all of the reference buffers if a KEY_FRAME is also a - // show_frame. This ensures a fwd keyframe does not update all of the buffers - if ((cm->frame_type == KEY_FRAME && cm->show_frame) || frame_is_sframe(cm)) { - for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) { - ref_cnt_fb(pool->frame_bufs, - &cm->ref_frame_map[cpi->ref_fb_idx[ref_frame]], - cm->new_fb_idx); - } - return; - } - - if (av1_preserve_existing_gf(cpi)) { - // We have decided to preserve the previously existing golden frame as our - // new ARF frame. However, in the short term in function - // av1_bitstream.c::get_refresh_mask() we left it in the GF slot and, if - // we're updating the GF with the current decoded frame, we save it to the - // ARF slot instead. - // We now have to update the ARF with the current frame and swap gld_fb_idx - // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF - // slot and, if we're updating the GF, the current frame becomes the new GF. - int tmp; - - // ARF in general is a better reference than overlay. We shouldkeep ARF as - // reference instead of replacing it with overlay. - - if (!cpi->preserve_arf_as_gld) { - ref_cnt_fb(pool->frame_bufs, - &cm->ref_frame_map[cpi->ref_fb_idx[ALTREF_FRAME - 1]], - cm->new_fb_idx); - } - - tmp = cpi->ref_fb_idx[ALTREF_FRAME - 1]; - cpi->ref_fb_idx[ALTREF_FRAME - 1] = cpi->ref_fb_idx[GOLDEN_FRAME - 1]; - cpi->ref_fb_idx[GOLDEN_FRAME - 1] = tmp; - - // TODO(zoeliu): Do we need to copy cpi->interp_filter_selected[0] over to - // cpi->interp_filter_selected[GOLDEN_FRAME]? - } else if (cpi->rc.is_src_frame_ext_arf && cm->show_existing_frame) { -#if CONFIG_DEBUG - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - assert(gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE); -#endif -#if USE_SYMM_MULTI_LAYER - const int bwdref_to_show = - (cpi->new_bwdref_update_rule == 1) ? BWDREF_FRAME : ALTREF2_FRAME; -#else - const int bwdref_to_show = ALTREF2_FRAME; -#endif - // Deal with the special case for showing existing internal ALTREF_FRAME - // Refresh the LAST_FRAME with the ALTREF_FRAME and retire the LAST3_FRAME - // by updating the virtual indices. - const int tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1]; - shift_last_ref_frames(cpi); - - cpi->ref_fb_idx[LAST_FRAME - 1] = cpi->ref_fb_idx[bwdref_to_show - 1]; - - memcpy(cpi->interp_filter_selected[LAST_FRAME], - cpi->interp_filter_selected[bwdref_to_show], - sizeof(cpi->interp_filter_selected[bwdref_to_show])); -#if USE_SYMM_MULTI_LAYER - if (cpi->new_bwdref_update_rule == 1) { - lshift_bwd_ref_frames(cpi); - // pass outdated forward reference frame (previous LAST3) to the - // spared space - cpi->ref_fb_idx[EXTREF_FRAME - 1] = tmp; - } else { -#endif - cpi->ref_fb_idx[bwdref_to_show - 1] = tmp; -#if USE_SYMM_MULTI_LAYER - } -#endif - } else { /* For non key/golden frames */ - // === ALTREF_FRAME === - if (cpi->refresh_alt_ref_frame) { - int arf_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1]; - ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx); - - memcpy(cpi->interp_filter_selected[ALTREF_FRAME], - cpi->interp_filter_selected[0], - sizeof(cpi->interp_filter_selected[0])); - } - - // === GOLDEN_FRAME === - if (cpi->refresh_golden_frame) { - ref_cnt_fb(pool->frame_bufs, - &cm->ref_frame_map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]], - cm->new_fb_idx); - - memcpy(cpi->interp_filter_selected[GOLDEN_FRAME], - cpi->interp_filter_selected[0], - sizeof(cpi->interp_filter_selected[0])); - } - - // === BWDREF_FRAME === - if (cpi->refresh_bwd_ref_frame) { -#if USE_SYMM_MULTI_LAYER - if (cpi->new_bwdref_update_rule) { - // We shift the backward reference frame as follows: - // BWDREF -> ALTREF2 -> EXTREF - // and assign the newly coded frame to BWDREF so that it always - // keeps the nearest future frame - int tmp = cpi->ref_fb_idx[EXTREF_FRAME - 1]; - ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[tmp], cm->new_fb_idx); - - rshift_bwd_ref_frames(cpi); - cpi->ref_fb_idx[BWDREF_FRAME - 1] = tmp; - } else { -#endif // USE_SYMM_MULTI_LAYER - ref_cnt_fb(pool->frame_bufs, - &cm->ref_frame_map[cpi->ref_fb_idx[BWDREF_FRAME - 1]], - cm->new_fb_idx); -#if USE_SYMM_MULTI_LAYER - } -#endif - memcpy(cpi->interp_filter_selected[BWDREF_FRAME], - cpi->interp_filter_selected[0], - sizeof(cpi->interp_filter_selected[0])); - } - - // === ALTREF2_FRAME === - if (cpi->refresh_alt2_ref_frame) { - ref_cnt_fb(pool->frame_bufs, - &cm->ref_frame_map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]], - cm->new_fb_idx); - - memcpy(cpi->interp_filter_selected[ALTREF2_FRAME], - cpi->interp_filter_selected[0], - sizeof(cpi->interp_filter_selected[0])); - } - } - - if (cpi->refresh_last_frame) { - // NOTE(zoeliu): We have two layers of mapping (1) from the per-frame - // reference to the reference frame buffer virtual index; and then (2) from - // the virtual index to the reference frame buffer physical index: - // - // LAST_FRAME, ..., LAST3_FRAME, ..., ALTREF_FRAME - // | | | - // v v v - // ref_fb_idx[0], ..., ref_fb_idx[2], ..., ref_fb_idx[ALTREF_FRAME-1] - // | | | - // v v v - // ref_frame_map[], ..., ref_frame_map[], ..., ref_frame_map[] - // - // When refresh_last_frame is set, it is intended to retire LAST3_FRAME, - // have the other 2 LAST reference frames shifted as follows: - // LAST_FRAME -> LAST2_FRAME -> LAST3_FRAME - // , and then have LAST_FRAME refreshed by the newly coded frame. - // - // To fulfill it, the decoder will be notified to execute following 2 steps: - // - // (a) To change ref_frame_map[] and have the virtual index of LAST3_FRAME - // to point to the newly coded frame, i.e. - // ref_frame_map[lst_fb_idexes[2]] => new_fb_idx; - // - // (b) To change the 1st layer mapping to have LAST_FRAME mapped to the - // original virtual index of LAST3_FRAME and have the other mappings - // shifted as follows: - // LAST_FRAME, LAST2_FRAME, LAST3_FRAME - // | | | - // v v v - // ref_fb_idx[2], ref_fb_idx[0], ref_fb_idx[1] - int tmp; - - ref_cnt_fb(pool->frame_bufs, - &cm->ref_frame_map[cpi->ref_fb_idx[LAST_REF_FRAMES - 1]], - cm->new_fb_idx); - - tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1]; - - shift_last_ref_frames(cpi); - cpi->ref_fb_idx[0] = tmp; - - assert(cm->show_existing_frame == 0); - memcpy(cpi->interp_filter_selected[LAST_FRAME], - cpi->interp_filter_selected[0], - sizeof(cpi->interp_filter_selected[0])); - - // If the new structure is used, we will always have overlay frames coupled - // with bwdref frames. Therefore, we won't have to perform this update - // in advance (we do this update when the overlay frame shows up). -#if USE_SYMM_MULTI_LAYER - if (cpi->new_bwdref_update_rule == 0 && cpi->rc.is_last_bipred_frame) { -#else - if (cpi->rc.is_last_bipred_frame) { -#endif - // Refresh the LAST_FRAME with the BWDREF_FRAME and retire the - // LAST3_FRAME by updating the virtual indices. - // - // NOTE: The source frame for BWDREF does not have a holding position as - // the OVERLAY frame for ALTREF's. Hence, to resolve the reference - // virtual index reshuffling for BWDREF, the encoder always - // specifies a LAST_BIPRED right before BWDREF and completes the - // reshuffling job accordingly. - tmp = cpi->ref_fb_idx[LAST_REF_FRAMES - 1]; - - shift_last_ref_frames(cpi); - cpi->ref_fb_idx[0] = cpi->ref_fb_idx[BWDREF_FRAME - 1]; - cpi->ref_fb_idx[BWDREF_FRAME - 1] = tmp; - - memcpy(cpi->interp_filter_selected[LAST_FRAME], - cpi->interp_filter_selected[BWDREF_FRAME], - sizeof(cpi->interp_filter_selected[BWDREF_FRAME])); - } - } - -#if DUMP_REF_FRAME_IMAGES == 1 - // Dump out all reference frame images. - dump_ref_frame_images(cpi); -#endif // DUMP_REF_FRAME_IMAGES -} - -static INLINE void alloc_frame_mvs(AV1_COMMON *const cm, int buffer_idx) { - assert(buffer_idx != INVALID_IDX); - RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx]; - ensure_mv_buffer(new_fb_ptr, cm); - new_fb_ptr->width = cm->width; - new_fb_ptr->height = cm->height; -} - -static void scale_references(AV1_COMP *cpi) { - AV1_COMMON *cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MV_REFERENCE_FRAME ref_frame; - const AOM_REFFRAME ref_mask[INTER_REFS_PER_FRAME] = { - AOM_LAST_FLAG, AOM_LAST2_FLAG, AOM_LAST3_FLAG, AOM_GOLD_FLAG, - AOM_BWD_FLAG, AOM_ALT2_FLAG, AOM_ALT_FLAG - }; - - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - // Need to convert from AOM_REFFRAME to index into ref_mask (subtract 1). - if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) { - BufferPool *const pool = cm->buffer_pool; - const YV12_BUFFER_CONFIG *const ref = - get_ref_frame_buffer(cpi, ref_frame); - - if (ref == NULL) { - cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX; - continue; - } - - if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) { - RefCntBuffer *new_fb_ptr = NULL; - int force_scaling = 0; - int new_fb = cpi->scaled_ref_idx[ref_frame - 1]; - if (new_fb == INVALID_IDX) { - new_fb = get_free_fb(cm); - force_scaling = 1; - } - if (new_fb == INVALID_IDX) return; - new_fb_ptr = &pool->frame_bufs[new_fb]; - if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width || - new_fb_ptr->buf.y_crop_height != cm->height) { - if (aom_realloc_frame_buffer( - &new_fb_ptr->buf, cm->width, cm->height, - cm->seq_params.subsampling_x, cm->seq_params.subsampling_y, - cm->seq_params.use_highbitdepth, AOM_BORDER_IN_PIXELS, - cm->byte_alignment, NULL, NULL, NULL)) - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate frame buffer"); - av1_resize_and_extend_frame( - ref, &new_fb_ptr->buf, (int)cm->seq_params.bit_depth, num_planes); - cpi->scaled_ref_idx[ref_frame - 1] = new_fb; - alloc_frame_mvs(cm, new_fb); - } - } else { - const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); - RefCntBuffer *const buf = &pool->frame_bufs[buf_idx]; - buf->buf.y_crop_width = ref->y_crop_width; - buf->buf.y_crop_height = ref->y_crop_height; - cpi->scaled_ref_idx[ref_frame - 1] = buf_idx; - ++buf->ref_count; - } - } else { - if (cpi->oxcf.pass != 0) cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX; - } - } -} - -static void release_scaled_references(AV1_COMP *cpi) { - AV1_COMMON *cm = &cpi->common; - int i; - // TODO(isbs): only refresh the necessary frames, rather than all of them - for (i = 0; i < REF_FRAMES; ++i) { - const int idx = cpi->scaled_ref_idx[i]; - RefCntBuffer *const buf = - idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL; - if (buf != NULL) { - --buf->ref_count; - cpi->scaled_ref_idx[i] = INVALID_IDX; - } - } -} - -static void set_mv_search_params(AV1_COMP *cpi) { - const AV1_COMMON *const cm = &cpi->common; - const unsigned int max_mv_def = AOMMIN(cm->width, cm->height); - - // Default based on max resolution. - cpi->mv_step_param = av1_init_search_range(max_mv_def); - - if (cpi->sf.mv.auto_mv_step_size) { - if (frame_is_intra_only(cm)) { - // Initialize max_mv_magnitude for use in the first INTER frame - // after a key/intra-only frame. - cpi->max_mv_magnitude = max_mv_def; - } else { - if (cm->show_frame) { - // Allow mv_steps to correspond to twice the max mv magnitude found - // in the previous frame, capped by the default max_mv_magnitude based - // on resolution. - cpi->mv_step_param = av1_init_search_range( - AOMMIN(max_mv_def, 2 * cpi->max_mv_magnitude)); - } - cpi->max_mv_magnitude = 0; - } - } -} - -static void set_size_independent_vars(AV1_COMP *cpi) { - int i; - for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { - cpi->common.global_motion[i] = default_warp_params; - } - cpi->global_motion_search_done = 0; - av1_set_speed_features_framesize_independent(cpi); - av1_set_rd_speed_thresholds(cpi); - av1_set_rd_speed_thresholds_sub8x8(cpi); - cpi->common.interp_filter = SWITCHABLE; - cpi->common.switchable_motion_mode = 1; -} - -static void set_size_dependent_vars(AV1_COMP *cpi, int *q, int *bottom_index, - int *top_index) { - AV1_COMMON *const cm = &cpi->common; - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - - // Setup variables that depend on the dimensions of the frame. - av1_set_speed_features_framesize_dependent(cpi); - - // Decide q and q bounds. - *q = av1_rc_pick_q_and_bounds(cpi, cm->width, cm->height, bottom_index, - top_index); - - if (!frame_is_intra_only(cm)) { - set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH, - cpi->common.cur_frame_force_integer_mv); - } - - // Configure experimental use of segmentation for enhanced coding of - // static regions if indicated. - // Only allowed in the second pass of a two pass encode, as it requires - // lagged coding, and if the relevant speed feature flag is set. - if (oxcf->pass == 2 && cpi->sf.static_segmentation) - configure_static_seg_features(cpi); -} - -static void init_motion_estimation(AV1_COMP *cpi) { - int y_stride = cpi->scaled_source.y_stride; - - if (cpi->sf.mv.search_method == NSTEP) { - av1_init3smotion_compensation(&cpi->ss_cfg, y_stride); - } else if (cpi->sf.mv.search_method == DIAMOND) { - av1_init_dsmotion_compensation(&cpi->ss_cfg, y_stride); - } -} - -#define COUPLED_CHROMA_FROM_LUMA_RESTORATION 0 -static void set_restoration_unit_size(int width, int height, int sx, int sy, - RestorationInfo *rst) { - (void)width; - (void)height; - (void)sx; - (void)sy; -#if COUPLED_CHROMA_FROM_LUMA_RESTORATION - int s = AOMMIN(sx, sy); -#else - int s = 0; -#endif // !COUPLED_CHROMA_FROM_LUMA_RESTORATION - - if (width * height > 352 * 288) - rst[0].restoration_unit_size = RESTORATION_UNITSIZE_MAX; - else - rst[0].restoration_unit_size = (RESTORATION_UNITSIZE_MAX >> 1); - rst[1].restoration_unit_size = rst[0].restoration_unit_size >> s; - rst[2].restoration_unit_size = rst[1].restoration_unit_size; -} - -static void init_ref_frame_bufs(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - int i; - BufferPool *const pool = cm->buffer_pool; - cm->new_fb_idx = INVALID_IDX; - for (i = 0; i < REF_FRAMES; ++i) { - cm->ref_frame_map[i] = INVALID_IDX; - pool->frame_bufs[i].ref_count = 0; - } - if (cm->seq_params.force_screen_content_tools) { - for (i = 0; i < FRAME_BUFFERS; ++i) { - av1_hash_table_init(&pool->frame_bufs[i].hash_table, &cpi->td.mb); - } - } -} - -static void check_initial_width(AV1_COMP *cpi, int use_highbitdepth, - int subsampling_x, int subsampling_y) { - AV1_COMMON *const cm = &cpi->common; - SequenceHeader *const seq_params = &cm->seq_params; - - if (!cpi->initial_width || seq_params->use_highbitdepth != use_highbitdepth || - seq_params->subsampling_x != subsampling_x || - seq_params->subsampling_y != subsampling_y) { - seq_params->subsampling_x = subsampling_x; - seq_params->subsampling_y = subsampling_y; - seq_params->use_highbitdepth = use_highbitdepth; - - alloc_raw_frame_buffers(cpi); - init_ref_frame_bufs(cpi); - alloc_util_frame_buffers(cpi); - - init_motion_estimation(cpi); // TODO(agrange) This can be removed. - - cpi->initial_width = cm->width; - cpi->initial_height = cm->height; - cpi->initial_mbs = cm->MBs; - } -} - -// Returns 1 if the assigned width or height was <= 0. -static int set_size_literal(AV1_COMP *cpi, int width, int height) { - AV1_COMMON *cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - check_initial_width(cpi, cm->seq_params.use_highbitdepth, - cm->seq_params.subsampling_x, - cm->seq_params.subsampling_y); - - if (width <= 0 || height <= 0) return 1; - - cm->width = width; - cm->height = height; - - if (cpi->initial_width && cpi->initial_height && - (cm->width > cpi->initial_width || cm->height > cpi->initial_height)) { - av1_free_context_buffers(cm); - av1_free_pc_tree(&cpi->td, num_planes); - alloc_compressor_data(cpi); - realloc_segmentation_maps(cpi); - cpi->initial_width = cpi->initial_height = 0; - } - update_frame_size(cpi); - - return 0; -} - -static void set_frame_size(AV1_COMP *cpi, int width, int height) { - AV1_COMMON *const cm = &cpi->common; - const SequenceHeader *const seq_params = &cm->seq_params; - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; - int ref_frame; - - if (width != cm->width || height != cm->height) { - // There has been a change in the encoded frame size - set_size_literal(cpi, width, height); - set_mv_search_params(cpi); - // Recalculate 'all_lossless' in case super-resolution was (un)selected. - cm->all_lossless = cm->coded_lossless && !av1_superres_scaled(cm); - } - - if (cpi->oxcf.pass == 2) { - av1_set_target_rate(cpi, cm->width, cm->height); - } - - alloc_frame_mvs(cm, cm->new_fb_idx); - - // Allocate above context buffers - if (cm->num_allocated_above_context_planes < av1_num_planes(cm) || - cm->num_allocated_above_context_mi_col < cm->mi_cols || - cm->num_allocated_above_contexts < cm->tile_rows) { - av1_free_above_context_buffers(cm, cm->num_allocated_above_contexts); - if (av1_alloc_above_context_buffers(cm, cm->tile_rows)) - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate context buffers"); - } - - // Reset the frame pointers to the current frame size. - if (aom_realloc_frame_buffer( - get_frame_new_buffer(cm), cm->width, cm->height, - seq_params->subsampling_x, seq_params->subsampling_y, - seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS, - cm->byte_alignment, NULL, NULL, NULL)) - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to allocate frame buffer"); - - const int frame_width = cm->superres_upscaled_width; - const int frame_height = cm->superres_upscaled_height; - set_restoration_unit_size(frame_width, frame_height, - seq_params->subsampling_x, - seq_params->subsampling_y, cm->rst_info); - for (int i = 0; i < num_planes; ++i) - cm->rst_info[i].frame_restoration_type = RESTORE_NONE; - - av1_alloc_restoration_buffers(cm); - alloc_util_frame_buffers(cpi); // TODO(afergs): Remove? Gets called anyways. - init_motion_estimation(cpi); - - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - LAST_FRAME]; - const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); - - ref_buf->idx = buf_idx; - - if (buf_idx != INVALID_IDX) { - YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[buf_idx].buf; - ref_buf->buf = buf; - av1_setup_scale_factors_for_frame(&ref_buf->sf, buf->y_crop_width, - buf->y_crop_height, cm->width, - cm->height); - if (av1_is_scaled(&ref_buf->sf)) - aom_extend_frame_borders(buf, num_planes); - } else { - ref_buf->buf = NULL; - } - } - - av1_setup_scale_factors_for_frame(&cm->sf_identity, cm->width, cm->height, - cm->width, cm->height); - - set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME); -} - -static uint8_t calculate_next_resize_scale(const AV1_COMP *cpi) { - // Choose an arbitrary random number - static unsigned int seed = 56789; - const AV1EncoderConfig *oxcf = &cpi->oxcf; - if (oxcf->pass == 1) return SCALE_NUMERATOR; - uint8_t new_denom = SCALE_NUMERATOR; - - if (cpi->common.seq_params.reduced_still_picture_hdr) return SCALE_NUMERATOR; - switch (oxcf->resize_mode) { - case RESIZE_NONE: new_denom = SCALE_NUMERATOR; break; - case RESIZE_FIXED: - if (cpi->common.frame_type == KEY_FRAME) - new_denom = oxcf->resize_kf_scale_denominator; - else - new_denom = oxcf->resize_scale_denominator; - break; - case RESIZE_RANDOM: new_denom = lcg_rand16(&seed) % 9 + 8; break; - default: assert(0); - } - return new_denom; -} - -static uint8_t calculate_next_superres_scale(AV1_COMP *cpi) { - // Choose an arbitrary random number - static unsigned int seed = 34567; - const AV1EncoderConfig *oxcf = &cpi->oxcf; - if (oxcf->pass == 1) return SCALE_NUMERATOR; - uint8_t new_denom = SCALE_NUMERATOR; - - // Make sure that superres mode of the frame is consistent with the - // sequence-level flag. - assert(IMPLIES(oxcf->superres_mode != SUPERRES_NONE, - cpi->common.seq_params.enable_superres)); - assert(IMPLIES(!cpi->common.seq_params.enable_superres, - oxcf->superres_mode == SUPERRES_NONE)); - - switch (oxcf->superres_mode) { - case SUPERRES_NONE: new_denom = SCALE_NUMERATOR; break; - case SUPERRES_FIXED: - if (cpi->common.frame_type == KEY_FRAME) - new_denom = oxcf->superres_kf_scale_denominator; - else - new_denom = oxcf->superres_scale_denominator; - break; - case SUPERRES_RANDOM: new_denom = lcg_rand16(&seed) % 9 + 8; break; - case SUPERRES_QTHRESH: { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - const RATE_FACTOR_LEVEL rf_level = gf_group->rf_level[gf_group->index]; - const double rate_factor_delta = rate_factor_deltas[rf_level]; - const int qthresh = (rate_factor_delta <= 1.0) - ? oxcf->superres_qthresh - : oxcf->superres_kf_qthresh; - av1_set_target_rate(cpi, cpi->oxcf.width, cpi->oxcf.height); - int bottom_index, top_index; - const int q = av1_rc_pick_q_and_bounds( - cpi, cpi->oxcf.width, cpi->oxcf.height, &bottom_index, &top_index); - if (q < qthresh) { - new_denom = SCALE_NUMERATOR; - } else { - const uint8_t min_denom = SCALE_NUMERATOR + 1; - const uint8_t denom_step = (MAXQ - qthresh + 1) >> 3; - - if (q == qthresh) { - new_denom = min_denom; - } else if (denom_step == 0) { - new_denom = SCALE_NUMERATOR << 1; - } else { - const uint8_t additional_denom = (q - qthresh) / denom_step; - new_denom = - AOMMIN(min_denom + additional_denom, SCALE_NUMERATOR << 1); - } - } - break; - } - default: assert(0); - } - return new_denom; -} - -static int dimension_is_ok(int orig_dim, int resized_dim, int denom) { - return (resized_dim * SCALE_NUMERATOR >= orig_dim * denom / 2); -} - -static int dimensions_are_ok(int owidth, int oheight, size_params_type *rsz) { - // Only need to check the width, as scaling is horizontal only. - (void)oheight; - return dimension_is_ok(owidth, rsz->resize_width, rsz->superres_denom); -} - -static int validate_size_scales(RESIZE_MODE resize_mode, - SUPERRES_MODE superres_mode, int owidth, - int oheight, size_params_type *rsz) { - if (dimensions_are_ok(owidth, oheight, rsz)) { // Nothing to do. - return 1; - } - - // Calculate current resize scale. - int resize_denom = - AOMMAX(DIVIDE_AND_ROUND(owidth * SCALE_NUMERATOR, rsz->resize_width), - DIVIDE_AND_ROUND(oheight * SCALE_NUMERATOR, rsz->resize_height)); - - if (resize_mode != RESIZE_RANDOM && superres_mode == SUPERRES_RANDOM) { - // Alter superres scale as needed to enforce conformity. - rsz->superres_denom = - (2 * SCALE_NUMERATOR * SCALE_NUMERATOR) / resize_denom; - if (!dimensions_are_ok(owidth, oheight, rsz)) { - if (rsz->superres_denom > SCALE_NUMERATOR) --rsz->superres_denom; - } - } else if (resize_mode == RESIZE_RANDOM && superres_mode != SUPERRES_RANDOM) { - // Alter resize scale as needed to enforce conformity. - resize_denom = - (2 * SCALE_NUMERATOR * SCALE_NUMERATOR) / rsz->superres_denom; - rsz->resize_width = owidth; - rsz->resize_height = oheight; - av1_calculate_scaled_size(&rsz->resize_width, &rsz->resize_height, - resize_denom); - if (!dimensions_are_ok(owidth, oheight, rsz)) { - if (resize_denom > SCALE_NUMERATOR) { - --resize_denom; - rsz->resize_width = owidth; - rsz->resize_height = oheight; - av1_calculate_scaled_size(&rsz->resize_width, &rsz->resize_height, - resize_denom); - } - } - } else if (resize_mode == RESIZE_RANDOM && superres_mode == SUPERRES_RANDOM) { - // Alter both resize and superres scales as needed to enforce conformity. - do { - if (resize_denom > rsz->superres_denom) - --resize_denom; - else - --rsz->superres_denom; - rsz->resize_width = owidth; - rsz->resize_height = oheight; - av1_calculate_scaled_size(&rsz->resize_width, &rsz->resize_height, - resize_denom); - } while (!dimensions_are_ok(owidth, oheight, rsz) && - (resize_denom > SCALE_NUMERATOR || - rsz->superres_denom > SCALE_NUMERATOR)); - } else { // We are allowed to alter neither resize scale nor superres - // scale. - return 0; - } - return dimensions_are_ok(owidth, oheight, rsz); -} - -// Calculates resize and superres params for next frame -size_params_type av1_calculate_next_size_params(AV1_COMP *cpi) { - const AV1EncoderConfig *oxcf = &cpi->oxcf; - size_params_type rsz = { oxcf->width, oxcf->height, SCALE_NUMERATOR }; - int resize_denom; - if (oxcf->pass == 1) return rsz; - if (cpi->resize_pending_width && cpi->resize_pending_height) { - rsz.resize_width = cpi->resize_pending_width; - rsz.resize_height = cpi->resize_pending_height; - cpi->resize_pending_width = cpi->resize_pending_height = 0; - } else { - resize_denom = calculate_next_resize_scale(cpi); - rsz.resize_width = cpi->oxcf.width; - rsz.resize_height = cpi->oxcf.height; - av1_calculate_scaled_size(&rsz.resize_width, &rsz.resize_height, - resize_denom); - } - rsz.superres_denom = calculate_next_superres_scale(cpi); - if (!validate_size_scales(oxcf->resize_mode, oxcf->superres_mode, oxcf->width, - oxcf->height, &rsz)) - assert(0 && "Invalid scale parameters"); - return rsz; -} - -static void setup_frame_size_from_params(AV1_COMP *cpi, size_params_type *rsz) { - int encode_width = rsz->resize_width; - int encode_height = rsz->resize_height; - - AV1_COMMON *cm = &cpi->common; - cm->superres_upscaled_width = encode_width; - cm->superres_upscaled_height = encode_height; - cm->superres_scale_denominator = rsz->superres_denom; - av1_calculate_scaled_superres_size(&encode_width, &encode_height, - rsz->superres_denom); - set_frame_size(cpi, encode_width, encode_height); -} - -static void setup_frame_size(AV1_COMP *cpi) { - size_params_type rsz = av1_calculate_next_size_params(cpi); - setup_frame_size_from_params(cpi, &rsz); -} - -static void superres_post_encode(AV1_COMP *cpi) { - AV1_COMMON *cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - - if (!av1_superres_scaled(cm)) return; - - assert(cpi->oxcf.enable_superres); - assert(!is_lossless_requested(&cpi->oxcf)); - assert(!cm->all_lossless); - - av1_superres_upscale(cm, NULL); - - // If regular resizing is occurring the source will need to be downscaled to - // match the upscaled superres resolution. Otherwise the original source is - // used. - if (!av1_resize_scaled(cm)) { - cpi->source = cpi->unscaled_source; - if (cpi->last_source != NULL) cpi->last_source = cpi->unscaled_last_source; - } else { - assert(cpi->unscaled_source->y_crop_width != cm->superres_upscaled_width); - assert(cpi->unscaled_source->y_crop_height != cm->superres_upscaled_height); - // Do downscale. cm->(width|height) has been updated by - // av1_superres_upscale - if (aom_realloc_frame_buffer( - &cpi->scaled_source, cm->superres_upscaled_width, - cm->superres_upscaled_height, cm->seq_params.subsampling_x, - cm->seq_params.subsampling_y, cm->seq_params.use_highbitdepth, - AOM_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) - aom_internal_error( - &cm->error, AOM_CODEC_MEM_ERROR, - "Failed to reallocate scaled source buffer for superres"); - assert(cpi->scaled_source.y_crop_width == cm->superres_upscaled_width); - assert(cpi->scaled_source.y_crop_height == cm->superres_upscaled_height); - av1_resize_and_extend_frame(cpi->unscaled_source, &cpi->scaled_source, - (int)cm->seq_params.bit_depth, num_planes); - cpi->source = &cpi->scaled_source; - } -} - -static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) { - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *xd = &cpi->td.mb.e_mbd; - - assert(IMPLIES(is_lossless_requested(&cpi->oxcf), - cm->coded_lossless && cm->all_lossless)); - - const int no_loopfilter = cm->coded_lossless || cm->large_scale_tile; - const int no_cdef = - !cm->seq_params.enable_cdef || cm->coded_lossless || cm->large_scale_tile; - const int no_restoration = !cm->seq_params.enable_restoration || - cm->all_lossless || cm->large_scale_tile; - - struct loopfilter *lf = &cm->lf; - - if (no_loopfilter) { - lf->filter_level[0] = 0; - lf->filter_level[1] = 0; - } else { - struct aom_usec_timer timer; - - aom_clear_system_state(); - - aom_usec_timer_start(&timer); - - av1_pick_filter_level(cpi->source, cpi, cpi->sf.lpf_pick); - - aom_usec_timer_mark(&timer); - cpi->time_pick_lpf += aom_usec_timer_elapsed(&timer); - } - - if (lf->filter_level[0] || lf->filter_level[1]) { -#if LOOP_FILTER_BITMASK - av1_loop_filter_frame(cm->frame_to_show, cm, xd, 0, 0, num_planes, 0); -#else - if (cpi->num_workers > 1) - av1_loop_filter_frame_mt(cm->frame_to_show, cm, xd, 0, num_planes, 0, - cpi->workers, cpi->num_workers, - &cpi->lf_row_sync); - else - av1_loop_filter_frame(cm->frame_to_show, cm, xd, 0, num_planes, 0); -#endif - } - - if (!no_restoration) - av1_loop_restoration_save_boundary_lines(cm->frame_to_show, cm, 0); - - if (no_cdef) { - cm->cdef_bits = 0; - cm->cdef_strengths[0] = 0; - cm->nb_cdef_strengths = 1; - cm->cdef_uv_strengths[0] = 0; - } else { - // Find CDEF parameters - av1_cdef_search(cm->frame_to_show, cpi->source, cm, xd, - cpi->sf.fast_cdef_search); - - // Apply the filter - av1_cdef_frame(cm->frame_to_show, cm, xd); - } - - superres_post_encode(cpi); - - if (no_restoration) { - cm->rst_info[0].frame_restoration_type = RESTORE_NONE; - cm->rst_info[1].frame_restoration_type = RESTORE_NONE; - cm->rst_info[2].frame_restoration_type = RESTORE_NONE; - } else { - av1_loop_restoration_save_boundary_lines(cm->frame_to_show, cm, 1); - av1_pick_filter_restoration(cpi->source, cpi); - if (cm->rst_info[0].frame_restoration_type != RESTORE_NONE || - cm->rst_info[1].frame_restoration_type != RESTORE_NONE || - cm->rst_info[2].frame_restoration_type != RESTORE_NONE) { - if (cpi->num_workers > 1) - av1_loop_restoration_filter_frame_mt(cm->frame_to_show, cm, 0, - cpi->workers, cpi->num_workers, - &cpi->lr_row_sync, &cpi->lr_ctxt); - else - av1_loop_restoration_filter_frame(cm->frame_to_show, cm, 0, - &cpi->lr_ctxt); - } - } -} - -static int encode_without_recode_loop(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - int q = 0, bottom_index = 0, top_index = 0; // Dummy variables. - - aom_clear_system_state(); - - set_size_independent_vars(cpi); - - setup_frame_size(cpi); - - assert(cm->width == cpi->scaled_source.y_crop_width); - assert(cm->height == cpi->scaled_source.y_crop_height); - - set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); - - cpi->source = - av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source); - if (cpi->unscaled_last_source != NULL) - cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source, - &cpi->scaled_last_source); - cpi->source->buf_8bit_valid = 0; - if (frame_is_intra_only(cm) == 0) { - scale_references(cpi); - } - - av1_set_quantizer(cm, q); - setup_frame(cpi); - suppress_active_map(cpi); - - // Variance adaptive and in frame q adjustment experiments are mutually - // exclusive. - if (cpi->oxcf.aq_mode == VARIANCE_AQ) { - av1_vaq_frame_setup(cpi); - } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { - av1_setup_in_frame_q_adj(cpi); - } else if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { - av1_cyclic_refresh_setup(cpi); - } - apply_active_map(cpi); - if (cm->seg.enabled) { - if (!cm->seg.update_data && cm->prev_frame) { - segfeatures_copy(&cm->seg, &cm->prev_frame->seg); - } else { - calculate_segdata(&cm->seg); - } - } else { - memset(&cm->seg, 0, sizeof(cm->seg)); - } - segfeatures_copy(&cm->cur_frame->seg, &cm->seg); - - // transform / motion compensation build reconstruction frame - av1_encode_frame(cpi); - - // Update some stats from cyclic refresh, and check if we should not update - // golden reference, for 1 pass CBR. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->frame_type != KEY_FRAME && - (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == AOM_CBR)) - av1_cyclic_refresh_check_golden_update(cpi); - - // Update the skip mb flag probabilities based on the distribution - // seen in the last encoder iteration. - // update_base_skip_probs(cpi); - aom_clear_system_state(); - return AOM_CODEC_OK; -} - -static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) { - AV1_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; - int bottom_index, top_index; - int loop_count = 0; - int loop_at_this_size = 0; - int loop = 0; - int overshoot_seen = 0; - int undershoot_seen = 0; - int frame_over_shoot_limit; - int frame_under_shoot_limit; - int q = 0, q_low = 0, q_high = 0; - - set_size_independent_vars(cpi); - - cpi->source->buf_8bit_valid = 0; - - aom_clear_system_state(); - setup_frame_size(cpi); - set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); - - do { - aom_clear_system_state(); - - if (loop_count == 0) { - // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed. - set_mv_search_params(cpi); - - // Reset the loop state for new frame size. - overshoot_seen = 0; - undershoot_seen = 0; - - q_low = bottom_index; - q_high = top_index; - - loop_at_this_size = 0; - - // Decide frame size bounds first time through. - av1_rc_compute_frame_size_bounds(cpi, rc->this_frame_target, - &frame_under_shoot_limit, - &frame_over_shoot_limit); - } - - // if frame was scaled calculate global_motion_search again if already - // done - if (loop_count > 0 && cpi->source && cpi->global_motion_search_done) - if (cpi->source->y_crop_width != cm->width || - cpi->source->y_crop_height != cm->height) - cpi->global_motion_search_done = 0; - cpi->source = - av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source); - if (cpi->unscaled_last_source != NULL) - cpi->last_source = av1_scale_if_required(cm, cpi->unscaled_last_source, - &cpi->scaled_last_source); - - if (frame_is_intra_only(cm) == 0) { - if (loop_count > 0) { - release_scaled_references(cpi); - } - scale_references(cpi); - } - av1_set_quantizer(cm, q); - // printf("Frame %d/%d: q = %d, frame_type = %d\n", cm->current_video_frame, - // cm->show_frame, q, cm->frame_type); - - if (loop_count == 0) setup_frame(cpi); - - // Base q-index may have changed, so we need to assign proper default coef - // probs before every iteration. - if (cm->primary_ref_frame == PRIMARY_REF_NONE || - cm->frame_refs[cm->primary_ref_frame].idx < 0) { - av1_default_coef_probs(cm); - av1_setup_frame_contexts(cm); - } - - // Variance adaptive and in frame q adjustment experiments are mutually - // exclusive. - if (cpi->oxcf.aq_mode == VARIANCE_AQ) { - av1_vaq_frame_setup(cpi); - } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { - av1_setup_in_frame_q_adj(cpi); - } - if (cm->seg.enabled) { - if (!cm->seg.update_data && cm->prev_frame) { - segfeatures_copy(&cm->seg, &cm->prev_frame->seg); - } else { - calculate_segdata(&cm->seg); - } - } else { - memset(&cm->seg, 0, sizeof(cm->seg)); - } - segfeatures_copy(&cm->cur_frame->seg, &cm->seg); - - // transform / motion compensation build reconstruction frame - save_coding_context(cpi); - av1_encode_frame(cpi); - - // Update the skip mb flag probabilities based on the distribution - // seen in the last encoder iteration. - // update_base_skip_probs(cpi); - - aom_clear_system_state(); - - // Dummy pack of the bitstream using up to date stats to get an - // accurate estimate of output frame size to determine if we need - // to recode. - if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) { - restore_coding_context(cpi); - - if (av1_pack_bitstream(cpi, dest, size) != AOM_CODEC_OK) - return AOM_CODEC_ERROR; - - rc->projected_frame_size = (int)(*size) << 3; - restore_coding_context(cpi); - - if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1; - } - - if (cpi->oxcf.rc_mode == AOM_Q) { - loop = 0; - } else { - if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced && - (rc->projected_frame_size < rc->max_frame_bandwidth)) { - int last_q = q; - int64_t kf_err; - - int64_t high_err_target = cpi->ambient_err; - int64_t low_err_target = cpi->ambient_err >> 1; - - if (cm->seq_params.use_highbitdepth) { - kf_err = aom_highbd_get_y_sse(cpi->source, get_frame_new_buffer(cm)); - } else { - kf_err = aom_get_y_sse(cpi->source, get_frame_new_buffer(cm)); - } - // Prevent possible divide by zero error below for perfect KF - kf_err += !kf_err; - - // The key frame is not good enough or we can afford - // to make it better without undue risk of popping. - if ((kf_err > high_err_target && - rc->projected_frame_size <= frame_over_shoot_limit) || - (kf_err > low_err_target && - rc->projected_frame_size <= frame_under_shoot_limit)) { - // Lower q_high - q_high = q > q_low ? q - 1 : q_low; - - // Adjust Q - q = (int)((q * high_err_target) / kf_err); - q = AOMMIN(q, (q_high + q_low) >> 1); - } else if (kf_err < low_err_target && - rc->projected_frame_size >= frame_under_shoot_limit) { - // The key frame is much better than the previous frame - // Raise q_low - q_low = q < q_high ? q + 1 : q_high; - - // Adjust Q - q = (int)((q * low_err_target) / kf_err); - q = AOMMIN(q, (q_high + q_low + 1) >> 1); - } - - // Clamp Q to upper and lower limits: - q = clamp(q, q_low, q_high); - - loop = q != last_q; - } else if (recode_loop_test(cpi, frame_over_shoot_limit, - frame_under_shoot_limit, q, - AOMMAX(q_high, top_index), bottom_index)) { - // Is the projected frame size out of range and are we allowed - // to attempt to recode. - int last_q = q; - int retries = 0; - - // Frame size out of permitted range: - // Update correction factor & compute new Q to try... - // Frame is too large - if (rc->projected_frame_size > rc->this_frame_target) { - // Special case if the projected size is > the max allowed. - if (rc->projected_frame_size >= rc->max_frame_bandwidth) - q_high = rc->worst_quality; - - // Raise Qlow as to at least the current value - q_low = q < q_high ? q + 1 : q_high; - - if (undershoot_seen || loop_at_this_size > 1) { - // Update rate_correction_factor unless - av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height); - - q = (q_high + q_low + 1) / 2; - } else { - // Update rate_correction_factor unless - av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height); - - q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index, - AOMMAX(q_high, top_index), cm->width, - cm->height); - - while (q < q_low && retries < 10) { - av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height); - q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index, - AOMMAX(q_high, top_index), cm->width, - cm->height); - retries++; - } - } - - overshoot_seen = 1; - } else { - // Frame is too small - q_high = q > q_low ? q - 1 : q_low; - - if (overshoot_seen || loop_at_this_size > 1) { - av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height); - q = (q_high + q_low) / 2; - } else { - av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height); - q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index, - top_index, cm->width, cm->height); - // Special case reset for qlow for constrained quality. - // This should only trigger where there is very substantial - // undershoot on a frame and the auto cq level is above - // the user passsed in value. - if (cpi->oxcf.rc_mode == AOM_CQ && q < q_low) { - q_low = q; - } - - while (q > q_high && retries < 10) { - av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height); - q = av1_rc_regulate_q(cpi, rc->this_frame_target, bottom_index, - top_index, cm->width, cm->height); - retries++; - } - } - - undershoot_seen = 1; - } - - // Clamp Q to upper and lower limits: - q = clamp(q, q_low, q_high); - - loop = (q != last_q); - } else { - loop = 0; - } - } - - // Special case for overlay frame. - if (rc->is_src_frame_alt_ref && - rc->projected_frame_size < rc->max_frame_bandwidth) - loop = 0; - - if (!cpi->sf.gm_disable_recode) { - if (recode_loop_test_global_motion(cpi)) loop = 1; - } - - if (loop) { - ++loop_count; - ++loop_at_this_size; - -#if CONFIG_INTERNAL_STATS - ++cpi->tot_recode_hits; -#endif - } - } while (loop); - - return AOM_CODEC_OK; -} - -static int get_ref_frame_flags(const AV1_COMP *cpi) { - const int *const map = cpi->common.ref_frame_map; - - // No.1 Priority: LAST_FRAME - const int last2_is_last = map[cpi->ref_fb_idx[1]] == map[cpi->ref_fb_idx[0]]; - const int last3_is_last = map[cpi->ref_fb_idx[2]] == map[cpi->ref_fb_idx[0]]; - const int gld_is_last = - map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]] == map[cpi->ref_fb_idx[0]]; - const int bwd_is_last = - map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] == map[cpi->ref_fb_idx[0]]; - const int alt2_is_last = - map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] == map[cpi->ref_fb_idx[0]]; - const int alt_is_last = - map[cpi->ref_fb_idx[ALTREF_FRAME - 1]] == map[cpi->ref_fb_idx[0]]; - - // No.2 Priority: ALTREF_FRAME - const int last2_is_alt = - map[cpi->ref_fb_idx[1]] == map[cpi->ref_fb_idx[ALTREF_FRAME - 1]]; - const int last3_is_alt = - map[cpi->ref_fb_idx[2]] == map[cpi->ref_fb_idx[ALTREF_FRAME - 1]]; - const int gld_is_alt = map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]] == - map[cpi->ref_fb_idx[ALTREF_FRAME - 1]]; - const int bwd_is_alt = map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] == - map[cpi->ref_fb_idx[ALTREF_FRAME - 1]]; - const int alt2_is_alt = map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] == - map[cpi->ref_fb_idx[ALTREF_FRAME - 1]]; - - // No.3 Priority: LAST2_FRAME - const int last3_is_last2 = map[cpi->ref_fb_idx[2]] == map[cpi->ref_fb_idx[1]]; - const int gld_is_last2 = - map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]] == map[cpi->ref_fb_idx[1]]; - const int bwd_is_last2 = - map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] == map[cpi->ref_fb_idx[1]]; - const int alt2_is_last2 = - map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] == map[cpi->ref_fb_idx[1]]; - - // No.4 Priority: LAST3_FRAME - const int gld_is_last3 = - map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]] == map[cpi->ref_fb_idx[2]]; - const int bwd_is_last3 = - map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] == map[cpi->ref_fb_idx[2]]; - const int alt2_is_last3 = - map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] == map[cpi->ref_fb_idx[2]]; - - // No.5 Priority: GOLDEN_FRAME - const int bwd_is_gld = map[cpi->ref_fb_idx[BWDREF_FRAME - 1]] == - map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]]; - const int alt2_is_gld = map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] == - map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]]; - - // No.6 Priority: BWDREF_FRAME - const int alt2_is_bwd = map[cpi->ref_fb_idx[ALTREF2_FRAME - 1]] == - map[cpi->ref_fb_idx[BWDREF_FRAME - 1]]; - - // No.7 Priority: ALTREF2_FRAME - - // After av1_apply_encoding_flags() is called, cpi->ref_frame_flags might be - // adjusted according to external encoder flags. - int flags = cpi->ext_ref_frame_flags; - - if (cpi->rc.frames_till_gf_update_due == INT_MAX) flags &= ~AOM_GOLD_FLAG; - - if (alt_is_last) flags &= ~AOM_ALT_FLAG; - - if (last2_is_last || last2_is_alt) flags &= ~AOM_LAST2_FLAG; - - if (last3_is_last || last3_is_alt || last3_is_last2) flags &= ~AOM_LAST3_FLAG; - - if (gld_is_last || gld_is_alt || gld_is_last2 || gld_is_last3) - flags &= ~AOM_GOLD_FLAG; - - if ((bwd_is_last || bwd_is_alt || bwd_is_last2 || bwd_is_last3 || - bwd_is_gld) && - (flags & AOM_BWD_FLAG)) - flags &= ~AOM_BWD_FLAG; - - if ((alt2_is_last || alt2_is_alt || alt2_is_last2 || alt2_is_last3 || - alt2_is_gld || alt2_is_bwd) && - (flags & AOM_ALT2_FLAG)) - flags &= ~AOM_ALT2_FLAG; - - return flags; -} - -static void set_ext_overrides(AV1_COMP *cpi) { - // Overrides the defaults with the externally supplied values with - // av1_update_reference() and av1_update_entropy() calls - // Note: The overrides are valid only for the next frame passed - // to encode_frame_to_data_rate() function - if (cpi->ext_use_s_frame) cpi->common.frame_type = S_FRAME; - cpi->common.force_primary_ref_none = cpi->ext_use_primary_ref_none; - - if (cpi->ext_refresh_frame_context_pending) { - cpi->common.refresh_frame_context = cpi->ext_refresh_frame_context; - cpi->ext_refresh_frame_context_pending = 0; - } - if (cpi->ext_refresh_frame_flags_pending) { - cpi->refresh_last_frame = cpi->ext_refresh_last_frame; - cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame; - cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame; - cpi->refresh_bwd_ref_frame = cpi->ext_refresh_bwd_ref_frame; - cpi->refresh_alt2_ref_frame = cpi->ext_refresh_alt2_ref_frame; - cpi->ext_refresh_frame_flags_pending = 0; - } - cpi->common.allow_ref_frame_mvs = cpi->ext_use_ref_frame_mvs; - // A keyframe is already error resilient and keyframes with - // error_resilient_mode interferes with the use of show_existing_frame - // when forward reference keyframes are enabled. - cpi->common.error_resilient_mode = - cpi->ext_use_error_resilient && cpi->common.frame_type != KEY_FRAME; -} - -#define DUMP_RECON_FRAMES 0 - -#if DUMP_RECON_FRAMES == 1 -// NOTE(zoeliu): For debug - Output the filtered reconstructed video. -static void dump_filtered_recon_frames(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - const YV12_BUFFER_CONFIG *recon_buf = cm->frame_to_show; - - if (recon_buf == NULL) { - printf("Frame %d is not ready.\n", cm->current_video_frame); - return; - } - - static const int flag_list[REF_FRAMES] = { 0, - AOM_LAST_FLAG, - AOM_LAST2_FLAG, - AOM_LAST3_FLAG, - AOM_GOLD_FLAG, - AOM_BWD_FLAG, - AOM_ALT2_FLAG, - AOM_ALT_FLAG }; - printf( - "\n***Frame=%d (frame_offset=%d, show_frame=%d, " - "show_existing_frame=%d) " - "[LAST LAST2 LAST3 GOLDEN BWD ALT2 ALT]=[", - cm->current_video_frame, cm->frame_offset, cm->show_frame, - cm->show_existing_frame); - for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx; - const int ref_offset = - (buf_idx >= 0) - ? (int)cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset - : -1; - printf( - " %d(%c-%d-%4.2f)", ref_offset, - (cpi->ref_frame_flags & flag_list[ref_frame]) ? 'Y' : 'N', - (buf_idx >= 0) ? (int)cpi->frame_rf_level[buf_idx] : -1, - (buf_idx >= 0) ? rate_factor_deltas[cpi->frame_rf_level[buf_idx]] : -1); - } - printf(" ]\n"); - - if (!cm->show_frame) { - printf("Frame %d is a no show frame, so no image dump.\n", - cm->current_video_frame); - return; - } - - int h; - char file_name[256] = "/tmp/enc_filtered_recon.yuv"; - FILE *f_recon = NULL; - - if (cm->current_video_frame == 0) { - if ((f_recon = fopen(file_name, "wb")) == NULL) { - printf("Unable to open file %s to write.\n", file_name); - return; - } - } else { - if ((f_recon = fopen(file_name, "ab")) == NULL) { - printf("Unable to open file %s to append.\n", file_name); - return; - } - } - printf( - "\nFrame=%5d, encode_update_type[%5d]=%1d, frame_offset=%d, " - "show_frame=%d, show_existing_frame=%d, source_alt_ref_active=%d, " - "refresh_alt_ref_frame=%d, rf_level=%d, " - "y_stride=%4d, uv_stride=%4d, cm->width=%4d, cm->height=%4d\n\n", - cm->current_video_frame, cpi->twopass.gf_group.index, - cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index], - cm->frame_offset, cm->show_frame, cm->show_existing_frame, - cpi->rc.source_alt_ref_active, cpi->refresh_alt_ref_frame, - cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index], - recon_buf->y_stride, recon_buf->uv_stride, cm->width, cm->height); -#if 0 - int ref_frame; - printf("get_ref_frame_map_idx: ["); - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) - printf(" %d", get_ref_frame_map_idx(cpi, ref_frame)); - printf(" ]\n"); - printf("cm->new_fb_idx = %d\n", cm->new_fb_idx); - printf("cm->ref_frame_map = ["); - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - printf(" %d", cm->ref_frame_map[ref_frame - LAST_FRAME]); - } - printf(" ]\n"); -#endif // 0 - - // --- Y --- - for (h = 0; h < cm->height; ++h) { - fwrite(&recon_buf->y_buffer[h * recon_buf->y_stride], 1, cm->width, - f_recon); - } - // --- U --- - for (h = 0; h < (cm->height >> 1); ++h) { - fwrite(&recon_buf->u_buffer[h * recon_buf->uv_stride], 1, (cm->width >> 1), - f_recon); - } - // --- V --- - for (h = 0; h < (cm->height >> 1); ++h) { - fwrite(&recon_buf->v_buffer[h * recon_buf->uv_stride], 1, (cm->width >> 1), - f_recon); - } - - fclose(f_recon); -} -#endif // DUMP_RECON_FRAMES - -static INLINE int is_frame_droppable(AV1_COMP *cpi) { - return !(cpi->refresh_alt_ref_frame || cpi->refresh_alt2_ref_frame || - cpi->refresh_bwd_ref_frame || cpi->refresh_golden_frame || - cpi->refresh_last_frame); -} - -static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size, uint8_t *dest, - int skip_adapt, - unsigned int *frame_flags) { - AV1_COMMON *const cm = &cpi->common; - SequenceHeader *const seq_params = &cm->seq_params; - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - struct segmentation *const seg = &cm->seg; - - set_ext_overrides(cpi); - aom_clear_system_state(); - - // frame type has been decided outside of this function call - cm->cur_frame->intra_only = frame_is_intra_only(cm); - cm->cur_frame->frame_type = cm->frame_type; - - // S_FRAMEs are always error resilient - cm->error_resilient_mode |= frame_is_sframe(cm); - - cm->large_scale_tile = cpi->oxcf.large_scale_tile; - cm->single_tile_decoding = cpi->oxcf.single_tile_decoding; - if (cm->large_scale_tile) seq_params->frame_id_numbers_present_flag = 0; - - cm->allow_ref_frame_mvs &= frame_might_allow_ref_frame_mvs(cm); - // cm->allow_ref_frame_mvs needs to be written into the frame header while - // cm->large_scale_tile is 1, therefore, "cm->large_scale_tile=1" case is - // separated from frame_might_allow_ref_frame_mvs(). - cm->allow_ref_frame_mvs &= !cm->large_scale_tile; - - cm->allow_warped_motion = - cpi->oxcf.allow_warped_motion && frame_might_allow_warped_motion(cm); - - // Reset the frame packet stamp index. - if (cm->frame_type == KEY_FRAME && cm->show_frame) - cm->current_video_frame = 0; - - // NOTE: - // (1) Move the setup of the ref_frame_flags upfront as it would be - // determined by the current frame properties; - // (2) The setup of the ref_frame_flags applies to both - // show_existing_frame's - // and the other cases. - if (cm->current_video_frame > 0) - cpi->ref_frame_flags = get_ref_frame_flags(cpi); - - if (encode_show_existing_frame(cm)) { - // NOTE(zoeliu): In BIDIR_PRED, the existing frame to show is the current - // BWDREF_FRAME in the reference frame buffer. - if (cm->frame_type == KEY_FRAME) { - cm->reset_decoder_state = 1; - } else { - cm->frame_type = INTER_FRAME; - } - cm->show_frame = 1; - cpi->frame_flags = *frame_flags; - - restore_coding_context(cpi); - - // Build the bitstream - if (av1_pack_bitstream(cpi, dest, size) != AOM_CODEC_OK) - return AOM_CODEC_ERROR; - - cpi->seq_params_locked = 1; - - // Set up frame to show to get ready for stats collection. - cm->frame_to_show = get_frame_new_buffer(cm); - - // Update current frame offset. - cm->frame_offset = - cm->buffer_pool->frame_bufs[cm->new_fb_idx].cur_frame_offset; - -#if DUMP_RECON_FRAMES == 1 - // NOTE(zoeliu): For debug - Output the filtered reconstructed video. - dump_filtered_recon_frames(cpi); -#endif // DUMP_RECON_FRAMES - - // Update the LAST_FRAME in the reference frame buffer. - // NOTE: - // (1) For BWDREF_FRAME as the show_existing_frame, the reference frame - // update has been done previously when handling the LAST_BIPRED_FRAME - // right before BWDREF_FRAME (in the display order); - // (2) For INTNL_OVERLAY as the show_existing_frame, the reference frame - // update will be done when the following is called, which will - // exchange - // the virtual indexes between LAST_FRAME and ALTREF2_FRAME, so that - // LAST3 will get retired, LAST2 becomes LAST3, LAST becomes LAST2, - // and - // ALTREF2_FRAME will serve as the new LAST_FRAME. - update_reference_frames(cpi); - - // Update frame flags - cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN; - cpi->frame_flags &= ~FRAMEFLAGS_BWDREF; - cpi->frame_flags &= ~FRAMEFLAGS_ALTREF; - - *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY; - - // Update the frame type - cm->last_frame_type = cm->frame_type; - - // Since we allocate a spot for the OVERLAY frame in the gf group, we need - // to do post-encoding update accordingly. - if (cpi->rc.is_src_frame_alt_ref) { - av1_set_target_rate(cpi, cm->width, cm->height); - av1_rc_postencode_update(cpi, *size); - } - - ++cm->current_video_frame; - - return AOM_CODEC_OK; - } - - // Set default state for segment based loop filter update flags. - cm->lf.mode_ref_delta_update = 0; - - // Set various flags etc to special state if it is a key frame. - if (frame_is_intra_only(cm) || frame_is_sframe(cm)) { - // Reset the loop filter deltas and segmentation map. - av1_reset_segment_features(cm); - - // If segmentation is enabled force a map update for key frames. - if (seg->enabled) { - seg->update_map = 1; - seg->update_data = 1; - } - - // The alternate reference frame cannot be active for a key frame. - cpi->rc.source_alt_ref_active = 0; - } - if (cpi->oxcf.mtu == 0) { - cm->num_tg = cpi->oxcf.num_tile_groups; - } else { - // Use a default value for the purposes of weighting costs in probability - // updates - cm->num_tg = DEFAULT_MAX_NUM_TG; - } - - // For 1 pass CBR, check if we are dropping this frame. - // Never drop on key frame. - if (oxcf->pass == 0 && oxcf->rc_mode == AOM_CBR && - cm->frame_type != KEY_FRAME) { - if (av1_rc_drop_frame(cpi)) { - av1_rc_postencode_update_drop_frame(cpi); - return AOM_CODEC_OK; - } - } - - aom_clear_system_state(); - -#if CONFIG_INTERNAL_STATS - memset(cpi->mode_chosen_counts, 0, - MAX_MODES * sizeof(*cpi->mode_chosen_counts)); -#endif - - if (seq_params->frame_id_numbers_present_flag) { - /* Non-normative definition of current_frame_id ("frame counter" with - * wraparound) */ - const int frame_id_length = FRAME_ID_LENGTH; - if (cm->current_frame_id == -1) { - int lsb, msb; - /* quasi-random initialization of current_frame_id for a key frame */ - if (cpi->source->flags & YV12_FLAG_HIGHBITDEPTH) { - lsb = CONVERT_TO_SHORTPTR(cpi->source->y_buffer)[0] & 0xff; - msb = CONVERT_TO_SHORTPTR(cpi->source->y_buffer)[1] & 0xff; - } else { - lsb = cpi->source->y_buffer[0] & 0xff; - msb = cpi->source->y_buffer[1] & 0xff; - } - cm->current_frame_id = ((msb << 8) + lsb) % (1 << frame_id_length); - - // S_frame is meant for stitching different streams of different - // resolutions together, so current_frame_id must be the - // same across different streams of the same content current_frame_id - // should be the same and not random. 0x37 is a chosen number as start - // point - if (cpi->oxcf.sframe_enabled) cm->current_frame_id = 0x37; - } else { - cm->current_frame_id = - (cm->current_frame_id + 1 + (1 << frame_id_length)) % - (1 << frame_id_length); - } - } - - switch (cpi->oxcf.cdf_update_mode) { - case 0: // No CDF update for any frames(4~6% compression loss). - cm->disable_cdf_update = 1; - break; - case 1: // Enable CDF update for all frames. - cm->disable_cdf_update = 0; - break; - case 2: - // Strategically determine at which frames to do CDF update. - // Currently only enable CDF update for all-intra and no-show frames(1.5% - // compression loss). - // TODO(huisu@google.com): design schemes for various trade-offs between - // compression quality and decoding speed. - cm->disable_cdf_update = - (frame_is_intra_only(cm) || !cm->show_frame) ? 0 : 1; - break; - } - cm->timing_info_present &= !seq_params->reduced_still_picture_hdr; - - if (cpi->sf.recode_loop == DISALLOW_RECODE) { - if (encode_without_recode_loop(cpi) != AOM_CODEC_OK) return AOM_CODEC_ERROR; - } else { - if (encode_with_recode_loop(cpi, size, dest) != AOM_CODEC_OK) - return AOM_CODEC_ERROR; - } - - cm->last_tile_cols = cm->tile_cols; - cm->last_tile_rows = cm->tile_rows; - -#ifdef OUTPUT_YUV_SKINMAP - if (cpi->common.current_video_frame > 1) { - av1_compute_skin_map(cpi, yuv_skinmap_file); - } -#endif // OUTPUT_YUV_SKINMAP - - // Special case code to reduce pulsing when key frames are forced at a - // fixed interval. Note the reconstruction error if it is the frame before - // the force key frame - if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) { - if (seq_params->use_highbitdepth) { - cpi->ambient_err = - aom_highbd_get_y_sse(cpi->source, get_frame_new_buffer(cm)); - } else { - cpi->ambient_err = aom_get_y_sse(cpi->source, get_frame_new_buffer(cm)); - } - } - - // If the encoder forced a KEY_FRAME decision or if frame is an S_FRAME - if ((cm->frame_type == KEY_FRAME && cm->show_frame) || frame_is_sframe(cm)) { - cpi->refresh_last_frame = 1; - } - - cm->frame_to_show = get_frame_new_buffer(cm); - cm->frame_to_show->color_primaries = seq_params->color_primaries; - cm->frame_to_show->transfer_characteristics = - seq_params->transfer_characteristics; - cm->frame_to_show->matrix_coefficients = seq_params->matrix_coefficients; - cm->frame_to_show->monochrome = seq_params->monochrome; - cm->frame_to_show->chroma_sample_position = - seq_params->chroma_sample_position; - cm->frame_to_show->color_range = seq_params->color_range; - cm->frame_to_show->render_width = cm->render_width; - cm->frame_to_show->render_height = cm->render_height; - - // TODO(zoeliu): For non-ref frames, loop filtering may need to be turned - // off. - - // Pick the loop filter level for the frame. - if (!cm->allow_intrabc) { - loopfilter_frame(cpi, cm); - } else { - cm->lf.filter_level[0] = 0; - cm->lf.filter_level[1] = 0; - cm->cdef_bits = 0; - cm->cdef_strengths[0] = 0; - cm->nb_cdef_strengths = 1; - cm->cdef_uv_strengths[0] = 0; - cm->rst_info[0].frame_restoration_type = RESTORE_NONE; - cm->rst_info[1].frame_restoration_type = RESTORE_NONE; - cm->rst_info[2].frame_restoration_type = RESTORE_NONE; - } - - // TODO(debargha): Fix mv search range on encoder side - // aom_extend_frame_inner_borders(cm->frame_to_show, av1_num_planes(cm)); - aom_extend_frame_borders(cm->frame_to_show, av1_num_planes(cm)); - -#ifdef OUTPUT_YUV_REC - aom_write_one_yuv_frame(cm, cm->frame_to_show); -#endif - - // Build the bitstream - if (av1_pack_bitstream(cpi, dest, size) != AOM_CODEC_OK) - return AOM_CODEC_ERROR; - - cpi->seq_params_locked = 1; - - if (skip_adapt) return AOM_CODEC_OK; - - if (seq_params->frame_id_numbers_present_flag) { - int i; - // Update reference frame id values based on the value of refresh_frame_mask - for (i = 0; i < REF_FRAMES; i++) { - if ((cpi->refresh_frame_mask >> i) & 1) { - cm->ref_frame_id[i] = cm->current_frame_id; - } - } - } - -#if DUMP_RECON_FRAMES == 1 - // NOTE(zoeliu): For debug - Output the filtered reconstructed video. - dump_filtered_recon_frames(cpi); -#endif // DUMP_RECON_FRAMES - - if (cm->seg.enabled) { - if (cm->seg.update_map) { - update_reference_segmentation_map(cpi); - } else if (cm->last_frame_seg_map) { - memcpy(cm->current_frame_seg_map, cm->last_frame_seg_map, - cm->mi_cols * cm->mi_rows * sizeof(uint8_t)); - } - } - - if (frame_is_intra_only(cm) == 0) { - release_scaled_references(cpi); - } - - update_reference_frames(cpi); - -#if CONFIG_ENTROPY_STATS - av1_accumulate_frame_counts(&aggregate_fc, &cpi->counts); -#endif // CONFIG_ENTROPY_STATS - - if (cm->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) { - *cm->fc = cpi->tile_data[cm->largest_tile_id].tctx; - av1_reset_cdf_symbol_counters(cm->fc); - } - - if (cpi->refresh_golden_frame == 1) - cpi->frame_flags |= FRAMEFLAGS_GOLDEN; - else - cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN; - - if (cpi->refresh_alt_ref_frame == 1) - cpi->frame_flags |= FRAMEFLAGS_ALTREF; - else - cpi->frame_flags &= ~FRAMEFLAGS_ALTREF; - - if (cpi->refresh_bwd_ref_frame == 1) - cpi->frame_flags |= FRAMEFLAGS_BWDREF; - else - cpi->frame_flags &= ~FRAMEFLAGS_BWDREF; - - cm->last_frame_type = cm->frame_type; - - av1_rc_postencode_update(cpi, *size); - - if (cm->frame_type == KEY_FRAME) { - // Tell the caller that the frame was coded as a key frame - *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY; - } else { - *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY; - } - - // Clear the one shot update flags for segmentation map and mode/ref loop - // filter deltas. - cm->seg.update_map = 0; - cm->seg.update_data = 0; - cm->lf.mode_ref_delta_update = 0; - - // A droppable frame might not be shown but it always - // takes a space in the gf group. Therefore, even when - // it is not shown, we still need update the count down. - - if (cm->show_frame) { - // TODO(zoeliu): We may only swamp mi and prev_mi for those frames that - // are - // being used as reference. - swap_mi_and_prev_mi(cm); - // Don't increment frame counters if this was an altref buffer - // update not a real frame - - ++cm->current_video_frame; - } - - // NOTE: Shall not refer to any frame not used as reference. - if (cm->is_reference_frame) { - // keep track of the last coded dimensions - cm->last_width = cm->width; - cm->last_height = cm->height; - - // reset to normal state now that we are done. - cm->last_show_frame = cm->show_frame; - } - - return AOM_CODEC_OK; -} - -static INLINE void update_keyframe_counters(AV1_COMP *cpi) { - // TODO(zoeliu): To investigate whether we should treat BWDREF_FRAME - // differently here for rc->avg_frame_bandwidth. - if (cpi->common.show_frame || cpi->rc.is_bwd_ref_frame) { - if (!cpi->common.show_existing_frame || cpi->rc.is_src_frame_alt_ref || - cpi->common.frame_type == KEY_FRAME) { - // If this is a show_existing_frame with a source other than altref, - // or if it is not a displayed forward keyframe, the keyframe update - // counters were incremented when it was originally encoded. - cpi->rc.frames_since_key++; - cpi->rc.frames_to_key--; - } - } -} - -static INLINE void update_frames_till_gf_update(AV1_COMP *cpi) { - // TODO(weitinglin): Updating this counter for is_frame_droppable - // is a work-around to handle the condition when a frame is drop. - // We should fix the cpi->common.show_frame flag - // instead of checking the other condition to update the counter properly. - if (cpi->common.show_frame || is_frame_droppable(cpi)) { - // Decrement count down till next gf - if (cpi->rc.frames_till_gf_update_due > 0) - cpi->rc.frames_till_gf_update_due--; - } -} - -static INLINE void update_twopass_gf_group_index(AV1_COMP *cpi) { - // Increment the gf group index ready for the next frame. If this is - // a show_existing_frame with a source other than altref, or if it is not - // a displayed forward keyframe, the index was incremented when it was - // originally encoded. - if (!cpi->common.show_existing_frame || cpi->rc.is_src_frame_alt_ref || - cpi->common.frame_type == KEY_FRAME) { - ++cpi->twopass.gf_group.index; - } -} - -static void update_rc_counts(AV1_COMP *cpi) { - update_keyframe_counters(cpi); - update_frames_till_gf_update(cpi); - if (cpi->oxcf.pass == 2) update_twopass_gf_group_index(cpi); -} - -static int Pass0Encode(AV1_COMP *cpi, size_t *size, uint8_t *dest, - int skip_adapt, unsigned int *frame_flags) { - if (cpi->oxcf.rc_mode == AOM_CBR) { - av1_rc_get_one_pass_cbr_params(cpi); - } else { - av1_rc_get_one_pass_vbr_params(cpi); - } - if (encode_frame_to_data_rate(cpi, size, dest, skip_adapt, frame_flags) != - AOM_CODEC_OK) { - return AOM_CODEC_ERROR; - } - update_rc_counts(cpi); - check_show_existing_frame(cpi); - return AOM_CODEC_OK; -} - -static int Pass2Encode(AV1_COMP *cpi, size_t *size, uint8_t *dest, - unsigned int *frame_flags) { -#if CONFIG_MISMATCH_DEBUG - mismatch_move_frame_idx_w(); -#endif -#if TXCOEFF_COST_TIMER - AV1_COMMON *cm = &cpi->common; - cm->txcoeff_cost_timer = 0; - cm->txcoeff_cost_count = 0; -#endif - - if (encode_frame_to_data_rate(cpi, size, dest, 0, frame_flags) != - AOM_CODEC_OK) { - return AOM_CODEC_ERROR; - } - -#if TXCOEFF_COST_TIMER - cm->cum_txcoeff_cost_timer += cm->txcoeff_cost_timer; - fprintf(stderr, - "\ntxb coeff cost block number: %ld, frame time: %ld, cum time %ld " - "in us\n", - cm->txcoeff_cost_count, cm->txcoeff_cost_timer, - cm->cum_txcoeff_cost_timer); -#endif - - av1_twopass_postencode_update(cpi); - update_rc_counts(cpi); - check_show_existing_frame(cpi); - return AOM_CODEC_OK; -} - -#if CONFIG_DENOISE -static int apply_denoise_2d(AV1_COMP *cpi, YV12_BUFFER_CONFIG *sd, - int block_size, float noise_level, - int64_t time_stamp, int64_t end_time) { - AV1_COMMON *const cm = &cpi->common; - if (!cpi->denoise_and_model) { - cpi->denoise_and_model = aom_denoise_and_model_alloc( - cm->seq_params.bit_depth, block_size, noise_level); - if (!cpi->denoise_and_model) { - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Error allocating denoise and model"); - return -1; - } - } - if (!cpi->film_grain_table) { - cpi->film_grain_table = aom_malloc(sizeof(*cpi->film_grain_table)); - if (!cpi->film_grain_table) { - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Error allocating grain table"); - return -1; - } - memset(cpi->film_grain_table, 0, sizeof(*cpi->film_grain_table)); - } - if (aom_denoise_and_model_run(cpi->denoise_and_model, sd, - &cm->film_grain_params)) { - if (cm->film_grain_params.apply_grain) { - aom_film_grain_table_append(cpi->film_grain_table, time_stamp, end_time, - &cm->film_grain_params); - } - } - return 0; -} -#endif - -int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags, - YV12_BUFFER_CONFIG *sd, int64_t time_stamp, - int64_t end_time) { - AV1_COMMON *const cm = &cpi->common; - const SequenceHeader *const seq_params = &cm->seq_params; - struct aom_usec_timer timer; - int res = 0; - const int subsampling_x = sd->subsampling_x; - const int subsampling_y = sd->subsampling_y; - const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0; - - check_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y); - - aom_usec_timer_start(&timer); - -#if CONFIG_DENOISE - if (cpi->oxcf.noise_level > 0) - if (apply_denoise_2d(cpi, sd, cpi->oxcf.noise_block_size, - cpi->oxcf.noise_level, time_stamp, end_time) < 0) - res = -1; -#endif // CONFIG_DENOISE - - if (av1_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, - use_highbitdepth, frame_flags)) - res = -1; - aom_usec_timer_mark(&timer); - cpi->time_receive_data += aom_usec_timer_elapsed(&timer); - - if ((seq_params->profile == PROFILE_0) && !seq_params->monochrome && - (subsampling_x != 1 || subsampling_y != 1)) { - aom_internal_error(&cm->error, AOM_CODEC_INVALID_PARAM, - "Non-4:2:0 color format requires profile 1 or 2"); - res = -1; - } - if ((seq_params->profile == PROFILE_1) && - !(subsampling_x == 0 && subsampling_y == 0)) { - aom_internal_error(&cm->error, AOM_CODEC_INVALID_PARAM, - "Profile 1 requires 4:4:4 color format"); - res = -1; - } - if ((seq_params->profile == PROFILE_2) && - (seq_params->bit_depth <= AOM_BITS_10) && - !(subsampling_x == 1 && subsampling_y == 0)) { - aom_internal_error(&cm->error, AOM_CODEC_INVALID_PARAM, - "Profile 2 bit-depth < 10 requires 4:2:2 color format"); - res = -1; - } - - return res; -} - -static int frame_is_reference(const AV1_COMP *cpi) { - const AV1_COMMON *cm = &cpi->common; - - return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame || - cpi->refresh_golden_frame || cpi->refresh_bwd_ref_frame || - cpi->refresh_alt2_ref_frame || cpi->refresh_alt_ref_frame || - !cm->error_resilient_mode || cm->lf.mode_ref_delta_update || - cm->seg.update_map || cm->seg.update_data; -} - -static void adjust_frame_rate(AV1_COMP *cpi, - const struct lookahead_entry *source) { - int64_t this_duration; - int step = 0; - - if (source->ts_start == cpi->first_time_stamp_ever) { - this_duration = source->ts_end - source->ts_start; - step = 1; - } else { - int64_t last_duration = - cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen; - - this_duration = source->ts_end - cpi->last_end_time_stamp_seen; - - // do a step update if the duration changes by 10% - if (last_duration) - step = (int)((this_duration - last_duration) * 10 / last_duration); - } - - if (this_duration) { - if (step) { - av1_new_framerate(cpi, 10000000.0 / this_duration); - } else { - // Average this frame's rate into the last second's average - // frame rate. If we haven't seen 1 second yet, then average - // over the whole interval seen. - const double interval = AOMMIN( - (double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0); - double avg_duration = 10000000.0 / cpi->framerate; - avg_duration *= (interval - avg_duration + this_duration); - avg_duration /= interval; - - av1_new_framerate(cpi, 10000000.0 / avg_duration); - } - } - cpi->last_time_stamp_seen = source->ts_start; - cpi->last_end_time_stamp_seen = source->ts_end; -} - -// Returns 0 if this is not an alt ref else the offset of the source frame -// used as the arf midpoint. -static int get_arf_src_index(AV1_COMP *cpi) { - RATE_CONTROL *const rc = &cpi->rc; - int arf_src_index = 0; - if (is_altref_enabled(cpi)) { - if (cpi->oxcf.pass == 2) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - if (gf_group->update_type[gf_group->index] == ARF_UPDATE) { - arf_src_index = gf_group->arf_src_offset[gf_group->index]; - } - } else if (rc->source_alt_ref_pending) { - arf_src_index = rc->frames_till_gf_update_due; - } - } - return arf_src_index; -} - -static int get_brf_src_index(AV1_COMP *cpi) { - int brf_src_index = 0; - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - - // TODO(zoeliu): We need to add the check on the -bwd_ref command line setup - // flag. - if (gf_group->bidir_pred_enabled[gf_group->index]) { - if (cpi->oxcf.pass == 2) { - if (gf_group->update_type[gf_group->index] == BRF_UPDATE) - brf_src_index = gf_group->brf_src_offset[gf_group->index]; - } else { - // TODO(zoeliu): To re-visit the setup for this scenario - brf_src_index = cpi->rc.bipred_group_interval - 1; - } - } - - return brf_src_index; -} - -// Returns 0 if this is not an alt ref else the offset of the source frame -// used as the arf midpoint. -static int get_arf2_src_index(AV1_COMP *cpi) { - int arf2_src_index = 0; - if (is_altref_enabled(cpi) && cpi->num_extra_arfs) { - if (cpi->oxcf.pass == 2) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) { - arf2_src_index = gf_group->arf_src_offset[gf_group->index]; - } - } - } - return arf2_src_index; -} - -static void check_src_altref(AV1_COMP *cpi, - const struct lookahead_entry *source) { - RATE_CONTROL *const rc = &cpi->rc; - - // If pass == 2, the parameters set here will be reset in - // av1_rc_get_second_pass_params() - - if (cpi->oxcf.pass == 2) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - rc->is_src_frame_alt_ref = - (gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE) || - (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE); - rc->is_src_frame_ext_arf = - gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE; - } else { - rc->is_src_frame_alt_ref = - cpi->alt_ref_source && (source == cpi->alt_ref_source); - } - - if (rc->is_src_frame_alt_ref) { - // Current frame is an ARF overlay frame. - cpi->alt_ref_source = NULL; - - if (rc->is_src_frame_ext_arf && !cpi->common.show_existing_frame) { - // For INTNL_OVERLAY, when show_existing_frame == 0, they do need to - // refresh the LAST_FRAME, i.e. LAST3 gets retired, LAST2 becomes LAST3, - // LAST becomes LAST2, and INTNL_OVERLAY becomes LAST. - cpi->refresh_last_frame = 1; - } else { - // Don't refresh the last buffer for an ARF overlay frame. It will - // become the GF so preserve last as an alternative prediction option. - cpi->refresh_last_frame = 0; - } - } -} - -#if CONFIG_INTERNAL_STATS -extern double av1_get_blockiness(const unsigned char *img1, int img1_pitch, - const unsigned char *img2, int img2_pitch, - int width, int height); - -static void adjust_image_stat(double y, double u, double v, double all, - ImageStat *s) { - s->stat[STAT_Y] += y; - s->stat[STAT_U] += u; - s->stat[STAT_V] += v; - s->stat[STAT_ALL] += all; - s->worst = AOMMIN(s->worst, all); -} - -static void compute_internal_stats(AV1_COMP *cpi, int frame_bytes) { - AV1_COMMON *const cm = &cpi->common; - double samples = 0.0; - uint32_t in_bit_depth = 8; - uint32_t bit_depth = 8; - -#if CONFIG_INTER_STATS_ONLY - if (cm->frame_type == KEY_FRAME) return; // skip key frame -#endif - cpi->bytes += frame_bytes; - - if (cm->seq_params.use_highbitdepth) { - in_bit_depth = cpi->oxcf.input_bit_depth; - bit_depth = cm->seq_params.bit_depth; - } - if (cm->show_frame) { - const YV12_BUFFER_CONFIG *orig = cpi->source; - const YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; - double y, u, v, frame_all; - - cpi->count++; - if (cpi->b_calculate_psnr) { - PSNR_STATS psnr; - double frame_ssim2 = 0.0, weight = 0.0; - aom_clear_system_state(); - // TODO(yaowu): unify these two versions into one. - aom_calc_highbd_psnr(orig, recon, &psnr, bit_depth, in_bit_depth); - - adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3], psnr.psnr[0], - &cpi->psnr); - cpi->total_sq_error += psnr.sse[0]; - cpi->total_samples += psnr.samples[0]; - samples = psnr.samples[0]; - // TODO(yaowu): unify these two versions into one. - if (cm->seq_params.use_highbitdepth) - frame_ssim2 = - aom_highbd_calc_ssim(orig, recon, &weight, bit_depth, in_bit_depth); - else - frame_ssim2 = aom_calc_ssim(orig, recon, &weight); - - cpi->worst_ssim = AOMMIN(cpi->worst_ssim, frame_ssim2); - cpi->summed_quality += frame_ssim2 * weight; - cpi->summed_weights += weight; - -#if 0 - { - FILE *f = fopen("q_used.stt", "a"); - double y2 = psnr.psnr[1]; - double u2 = psnr.psnr[2]; - double v2 = psnr.psnr[3]; - double frame_psnr2 = psnr.psnr[0]; - fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n", - cm->current_video_frame, y2, u2, v2, - frame_psnr2, frame_ssim2); - fclose(f); - } -#endif - } - if (cpi->b_calculate_blockiness) { - if (!cm->seq_params.use_highbitdepth) { - const double frame_blockiness = - av1_get_blockiness(orig->y_buffer, orig->y_stride, recon->y_buffer, - recon->y_stride, orig->y_width, orig->y_height); - cpi->worst_blockiness = AOMMAX(cpi->worst_blockiness, frame_blockiness); - cpi->total_blockiness += frame_blockiness; - } - - if (cpi->b_calculate_consistency) { - if (!cm->seq_params.use_highbitdepth) { - const double this_inconsistency = aom_get_ssim_metrics( - orig->y_buffer, orig->y_stride, recon->y_buffer, recon->y_stride, - orig->y_width, orig->y_height, cpi->ssim_vars, &cpi->metrics, 1); - - const double peak = (double)((1 << in_bit_depth) - 1); - const double consistency = - aom_sse_to_psnr(samples, peak, cpi->total_inconsistency); - if (consistency > 0.0) - cpi->worst_consistency = - AOMMIN(cpi->worst_consistency, consistency); - cpi->total_inconsistency += this_inconsistency; - } - } - } - - frame_all = - aom_calc_fastssim(orig, recon, &y, &u, &v, bit_depth, in_bit_depth); - adjust_image_stat(y, u, v, frame_all, &cpi->fastssim); - frame_all = aom_psnrhvs(orig, recon, &y, &u, &v, bit_depth, in_bit_depth); - adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs); - } -} -#endif // CONFIG_INTERNAL_STATS - -static int is_integer_mv(AV1_COMP *cpi, const YV12_BUFFER_CONFIG *cur_picture, - const YV12_BUFFER_CONFIG *last_picture, - hash_table *last_hash_table) { - aom_clear_system_state(); - // check use hash ME - int k; - uint32_t hash_value_1; - uint32_t hash_value_2; - - const int block_size = 8; - const double threshold_current = 0.8; - const double threshold_average = 0.95; - const int max_history_size = 32; - int T = 0; // total block - int C = 0; // match with collocated block - int S = 0; // smooth region but not match with collocated block - int M = 0; // match with other block - - const int pic_width = cur_picture->y_width; - const int pic_height = cur_picture->y_height; - for (int i = 0; i + block_size <= pic_height; i += block_size) { - for (int j = 0; j + block_size <= pic_width; j += block_size) { - const int x_pos = j; - const int y_pos = i; - int match = 1; - T++; - - // check whether collocated block match with current - uint8_t *p_cur = cur_picture->y_buffer; - uint8_t *p_ref = last_picture->y_buffer; - int stride_cur = cur_picture->y_stride; - int stride_ref = last_picture->y_stride; - p_cur += (y_pos * stride_cur + x_pos); - p_ref += (y_pos * stride_ref + x_pos); - - if (cur_picture->flags & YV12_FLAG_HIGHBITDEPTH) { - uint16_t *p16_cur = CONVERT_TO_SHORTPTR(p_cur); - uint16_t *p16_ref = CONVERT_TO_SHORTPTR(p_ref); - for (int tmpY = 0; tmpY < block_size && match; tmpY++) { - for (int tmpX = 0; tmpX < block_size && match; tmpX++) { - if (p16_cur[tmpX] != p16_ref[tmpX]) { - match = 0; - } - } - p16_cur += stride_cur; - p16_ref += stride_ref; - } - } else { - for (int tmpY = 0; tmpY < block_size && match; tmpY++) { - for (int tmpX = 0; tmpX < block_size && match; tmpX++) { - if (p_cur[tmpX] != p_ref[tmpX]) { - match = 0; - } - } - p_cur += stride_cur; - p_ref += stride_ref; - } - } - - if (match) { - C++; - continue; - } - - if (av1_hash_is_horizontal_perfect(cur_picture, block_size, x_pos, - y_pos) || - av1_hash_is_vertical_perfect(cur_picture, block_size, x_pos, y_pos)) { - S++; - continue; - } - - av1_get_block_hash_value( - cur_picture->y_buffer + y_pos * stride_cur + x_pos, stride_cur, - block_size, &hash_value_1, &hash_value_2, - (cur_picture->flags & YV12_FLAG_HIGHBITDEPTH), &cpi->td.mb); - // Hashing does not work for highbitdepth currently. - // TODO(Roger): Make it work for highbitdepth. - if (av1_use_hash_me(&cpi->common)) { - if (av1_has_exact_match(last_hash_table, hash_value_1, hash_value_2)) { - M++; - } - } - } - } - - assert(T > 0); - double csm_rate = ((double)(C + S + M)) / ((double)(T)); - double m_rate = ((double)(M)) / ((double)(T)); - - cpi->csm_rate_array[cpi->rate_index] = csm_rate; - cpi->m_rate_array[cpi->rate_index] = m_rate; - - cpi->rate_index = (cpi->rate_index + 1) % max_history_size; - cpi->rate_size++; - cpi->rate_size = AOMMIN(cpi->rate_size, max_history_size); - - if (csm_rate < threshold_current) { - return 0; - } - - if (C == T) { - return 1; - } - - double csm_average = 0.0; - double m_average = 0.0; - - for (k = 0; k < cpi->rate_size; k++) { - csm_average += cpi->csm_rate_array[k]; - m_average += cpi->m_rate_array[k]; - } - csm_average /= cpi->rate_size; - m_average /= cpi->rate_size; - - if (csm_average < threshold_average) { - return 0; - } - - if (M > (T - C - S) / 3) { - return 1; - } - - if (csm_rate > 0.99 && m_rate > 0.01) { - return 1; - } - - if (csm_average + m_average > 1.01) { - return 1; - } - - return 0; -} - -int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags, - size_t *size, uint8_t *dest, int64_t *time_stamp, - int64_t *time_end, int flush, - const aom_rational_t *timebase) { - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - BufferPool *const pool = cm->buffer_pool; - RATE_CONTROL *const rc = &cpi->rc; - struct aom_usec_timer cmptimer; - YV12_BUFFER_CONFIG *force_src_buffer = NULL; - struct lookahead_entry *last_source = NULL; - struct lookahead_entry *source = NULL; - int arf_src_index; - int brf_src_index; - int i; - -#if CONFIG_BITSTREAM_DEBUG - assert(cpi->oxcf.max_threads == 0 && - "bitstream debug tool does not support multithreading"); - bitstream_queue_record_write(); - bitstream_queue_set_frame_write(cm->current_video_frame * 2 + cm->show_frame); -#endif - - cm->showable_frame = 0; - aom_usec_timer_start(&cmptimer); - - set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV, 0); - - // Normal defaults - cm->refresh_frame_context = oxcf->frame_parallel_decoding_mode - ? REFRESH_FRAME_CONTEXT_DISABLED - : REFRESH_FRAME_CONTEXT_BACKWARD; - if (oxcf->large_scale_tile) - cm->refresh_frame_context = REFRESH_FRAME_CONTEXT_DISABLED; - - // default reference buffers update config - av1_configure_buffer_updates_firstpass(cpi, LF_UPDATE); - - // Initialize fields related to forward keyframes - cpi->no_show_kf = 0; - cm->reset_decoder_state = 0; - - // Don't allow a show_existing_frame to coincide with an error resilient or - // S-Frame. An exception can be made in the case of a keyframe, since it - // does not depend on any previous frames. We must make this exception here - // because of the use of show_existing_frame with forward coded keyframes. - struct lookahead_entry *lookahead_src = NULL; - if (cm->current_video_frame > 0) - lookahead_src = av1_lookahead_peek(cpi->lookahead, 0); - - int use_show_existing = 1; - if (lookahead_src != NULL) { - const int is_error_resilient = - cpi->oxcf.error_resilient_mode || - (lookahead_src->flags & AOM_EFLAG_ERROR_RESILIENT); - const int is_s_frame = cpi->oxcf.s_frame_mode || - (lookahead_src->flags & AOM_EFLAG_SET_S_FRAME); - const int is_key_frame = - (rc->frames_to_key == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY); - use_show_existing = !(is_error_resilient || is_s_frame) || is_key_frame; - } - - if (oxcf->pass == 2 && cm->show_existing_frame && use_show_existing) { - // Manage the source buffer and flush out the source frame that has been - // coded already; Also get prepared for PSNR calculation if needed. - if ((source = av1_lookahead_pop(cpi->lookahead, flush)) == NULL) { - *size = 0; - return -1; - } - av1_apply_encoding_flags(cpi, source->flags); - cpi->source = &source->img; - // TODO(zoeliu): To track down to determine whether it's needed to adjust - // the frame rate. - *time_stamp = source->ts_start; - *time_end = source->ts_end; - - // We need to adjust frame rate for an overlay frame - if (cpi->rc.is_src_frame_alt_ref) adjust_frame_rate(cpi, source); - - // Find a free buffer for the new frame, releasing the reference - // previously - // held. - if (cm->new_fb_idx != INVALID_IDX) { - --pool->frame_bufs[cm->new_fb_idx].ref_count; - } - cm->new_fb_idx = get_free_fb(cm); - - if (cm->new_fb_idx == INVALID_IDX) return -1; - - // Clear down mmx registers - aom_clear_system_state(); - - // Start with a 0 size frame. - *size = 0; - - // We need to update the gf_group for show_existing overlay frame - if (cpi->rc.is_src_frame_alt_ref) av1_rc_get_second_pass_params(cpi); - - if (Pass2Encode(cpi, size, dest, frame_flags) != AOM_CODEC_OK) - return AOM_CODEC_ERROR; - - if (cpi->b_calculate_psnr) generate_psnr_packet(cpi); - -#if CONFIG_INTERNAL_STATS - compute_internal_stats(cpi, (int)(*size)); -#endif // CONFIG_INTERNAL_STATS - - // Clear down mmx registers - aom_clear_system_state(); - - cm->show_existing_frame = 0; - return 0; - } - - // Should we encode an arf frame. - arf_src_index = get_arf_src_index(cpi); - if (arf_src_index) { - for (i = 0; i <= arf_src_index; ++i) { - struct lookahead_entry *e = av1_lookahead_peek(cpi->lookahead, i); - // Avoid creating an alt-ref if there's a forced keyframe pending. - if (e == NULL) { - break; - } else if (e->flags == AOM_EFLAG_FORCE_KF) { - arf_src_index = 0; - flush = 1; - break; - } - } - } - - if (arf_src_index) { - assert(arf_src_index <= rc->frames_to_key); - - if ((source = av1_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) { - cm->showable_frame = 1; - cpi->alt_ref_source = source; - // When arf_src_index == rc->frames_to_key, it indicates a fwd_kf - if (arf_src_index == rc->frames_to_key) { - // Skip temporal filtering and mark as intra_only if we have a fwd_kf - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - int which_arf = gf_group->arf_update_idx[gf_group->index]; - cpi->is_arf_filter_off[which_arf] = 1; - cpi->no_show_kf = 1; - } else { - if (oxcf->arnr_max_frames > 0) { - // Produce the filtered ARF frame. - av1_temporal_filter(cpi, arf_src_index); - aom_extend_frame_borders(&cpi->alt_ref_buffer, num_planes); - force_src_buffer = &cpi->alt_ref_buffer; - } - } - cm->show_frame = 0; - cm->intra_only = 0; - - if (oxcf->pass < 2) { - // In second pass, the buffer updates configure will be set - // in the function av1_rc_get_second_pass_params - av1_configure_buffer_updates_firstpass(cpi, ARF_UPDATE); - } - } - rc->source_alt_ref_pending = 0; - } - - // Should we encode an arf2 frame. - arf_src_index = get_arf2_src_index(cpi); - if (arf_src_index) { - for (i = 0; i <= arf_src_index; ++i) { - struct lookahead_entry *e = av1_lookahead_peek(cpi->lookahead, i); - // Avoid creating an alt-ref if there's a forced keyframe pending. - if (e == NULL) { - break; - } else if (e->flags == AOM_EFLAG_FORCE_KF) { - arf_src_index = 0; - flush = 1; - break; - } - } - } - - if (arf_src_index) { - assert(arf_src_index <= rc->frames_to_key); - - if ((source = av1_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) { - cm->showable_frame = 1; - cpi->alt_ref_source = source; - - if (oxcf->arnr_max_frames > 0) { - // Produce the filtered ARF frame. - av1_temporal_filter(cpi, arf_src_index); - aom_extend_frame_borders(&cpi->alt_ref_buffer, num_planes); - force_src_buffer = &cpi->alt_ref_buffer; - } - - cm->show_frame = 0; - cm->intra_only = 0; - - if (oxcf->pass < 2) { - // In second pass, the buffer updates configure will be set - // in the function av1_rc_get_second_pass_params - av1_configure_buffer_updates_firstpass(cpi, INTNL_ARF_UPDATE); - } - } - rc->source_alt_ref_pending = 0; - } - - rc->is_bwd_ref_frame = 0; - brf_src_index = get_brf_src_index(cpi); - if (brf_src_index) { - assert(brf_src_index <= rc->frames_to_key); - if ((source = av1_lookahead_peek(cpi->lookahead, brf_src_index)) != NULL) { - cm->showable_frame = 1; - cm->show_frame = 0; - cm->intra_only = 0; - - if (oxcf->pass < 2) { - // In second pass, the buffer updates configure will be set - // in the function av1_rc_get_second_pass_params - av1_configure_buffer_updates_firstpass(cpi, BIPRED_UPDATE); - } - } - } - - if (!source) { - // Get last frame source. - if (cm->current_video_frame > 0) { - if ((last_source = av1_lookahead_peek(cpi->lookahead, -1)) == NULL) - return -1; - } - if (cm->current_video_frame > 0) assert(last_source != NULL); - // Read in the source frame. - source = av1_lookahead_pop(cpi->lookahead, flush); - - if (source != NULL) { - cm->show_frame = 1; - cm->intra_only = 0; - - // Check to see if the frame should be encoded as an arf overlay. - check_src_altref(cpi, source); - } - } - if (source) { - cpi->unscaled_source = cpi->source = - force_src_buffer ? force_src_buffer : &source->img; - cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL; - - *time_stamp = source->ts_start; - *time_end = source->ts_end; - av1_apply_encoding_flags(cpi, source->flags); - *frame_flags = (source->flags & AOM_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0; - - } else { - *size = 0; - if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) { - av1_end_first_pass(cpi); /* get last stats packet */ - cpi->twopass.first_pass_done = 1; - } - return -1; - } - - if (source->ts_start < cpi->first_time_stamp_ever) { - cpi->first_time_stamp_ever = source->ts_start; - cpi->last_end_time_stamp_seen = source->ts_start; - } - - // Clear down mmx registers - aom_clear_system_state(); - - // adjust frame rates based on timestamps given - if (cm->show_frame) adjust_frame_rate(cpi, source); - - // Find a free buffer for the new frame, releasing the reference previously - // held. - if (cm->new_fb_idx != INVALID_IDX) { - --pool->frame_bufs[cm->new_fb_idx].ref_count; - } - cm->new_fb_idx = get_free_fb(cm); - - if (cm->new_fb_idx == INVALID_IDX) return -1; - - // Retain the RF_LEVEL for the current newly coded frame. - cpi->frame_rf_level[cm->new_fb_idx] = - cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index]; - - cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx]; - cm->cur_frame->buf.buf_8bit_valid = 0; - - if (cpi->film_grain_table) { - cm->seq_params.film_grain_params_present = aom_film_grain_table_lookup( - cpi->film_grain_table, *time_stamp, *time_end, 0 /* =erase */, - &cm->film_grain_params); - } - cm->cur_frame->film_grain_params_present = - cm->seq_params.film_grain_params_present; - - // only one operating point supported now - const int64_t pts64 = ticks_to_timebase_units(timebase, *time_stamp); - if (pts64 < 0 || pts64 > UINT32_MAX) return AOM_CODEC_ERROR; - cpi->common.frame_presentation_time = (uint32_t)pts64; - - // Start with a 0 size frame. - *size = 0; - - cpi->frame_flags = *frame_flags; - - if (oxcf->pass == 2) { - av1_rc_get_second_pass_params(cpi); - } else if (oxcf->pass == 1) { - setup_frame_size(cpi); - } - - if (cpi->oxcf.pass != 0 || frame_is_intra_only(cm) == 1) { - for (i = 0; i < REF_FRAMES; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX; - } - - cm->using_qmatrix = cpi->oxcf.using_qm; - cm->min_qmlevel = cpi->oxcf.qm_minlevel; - cm->max_qmlevel = cpi->oxcf.qm_maxlevel; - - if (cm->seq_params.frame_id_numbers_present_flag) { - if (*time_stamp == 0) { - cpi->common.current_frame_id = -1; - } - } - - cpi->cur_poc++; - if (oxcf->pass != 1 && cpi->common.allow_screen_content_tools && - !frame_is_intra_only(cm)) { - if (cpi->common.seq_params.force_integer_mv == 2) { - struct lookahead_entry *previous_entry = - av1_lookahead_peek(cpi->lookahead, cpi->previous_index); - if (!previous_entry) - cpi->common.cur_frame_force_integer_mv = 0; - else - cpi->common.cur_frame_force_integer_mv = is_integer_mv( - cpi, cpi->source, &previous_entry->img, cpi->previous_hash_table); - } else { - cpi->common.cur_frame_force_integer_mv = - cpi->common.seq_params.force_integer_mv; - } - } else { - cpi->common.cur_frame_force_integer_mv = 0; - } - - if (oxcf->pass == 1) { - cpi->td.mb.e_mbd.lossless[0] = is_lossless_requested(oxcf); - av1_first_pass(cpi, source); - } else if (oxcf->pass == 2) { - if (Pass2Encode(cpi, size, dest, frame_flags) != AOM_CODEC_OK) - return AOM_CODEC_ERROR; - } else { - // One pass encode - if (Pass0Encode(cpi, size, dest, 0, frame_flags) != AOM_CODEC_OK) - return AOM_CODEC_ERROR; - } - if (oxcf->pass != 1 && cpi->common.allow_screen_content_tools) { - cpi->previous_hash_table = &cm->cur_frame->hash_table; - { - int l; - for (l = -MAX_PRE_FRAMES; l < cpi->lookahead->max_sz; l++) { - if ((cpi->lookahead->buf + l) == source) { - cpi->previous_index = l; - break; - } - } - - if (l == cpi->lookahead->max_sz) { - aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR, - "Failed to find last frame original buffer"); - } - } - } - - if (!cm->large_scale_tile) { - cm->frame_contexts[cm->new_fb_idx] = *cm->fc; - } - -#define EXT_TILE_DEBUG 0 -#if EXT_TILE_DEBUG - if (cm->large_scale_tile && oxcf->pass == 2) { - char fn[20] = "./fc"; - fn[4] = cm->current_video_frame / 100 + '0'; - fn[5] = (cm->current_video_frame % 100) / 10 + '0'; - fn[6] = (cm->current_video_frame % 10) + '0'; - fn[7] = '\0'; - av1_print_frame_contexts(cm->fc, fn); - } -#endif // EXT_TILE_DEBUG -#undef EXT_TILE_DEBUG - - cm->showable_frame = !cm->show_frame && cm->showable_frame; - - // No frame encoded, or frame was dropped, release scaled references. - if ((*size == 0) && (frame_is_intra_only(cm) == 0)) { - release_scaled_references(cpi); - } - - if (*size > 0) { - cpi->droppable = !frame_is_reference(cpi); - } - - aom_usec_timer_mark(&cmptimer); - cpi->time_compress_data += aom_usec_timer_elapsed(&cmptimer); - - if (cpi->b_calculate_psnr && oxcf->pass != 1 && cm->show_frame) - generate_psnr_packet(cpi); - -#if CONFIG_INTERNAL_STATS - if (oxcf->pass != 1) { - compute_internal_stats(cpi, (int)(*size)); - } -#endif // CONFIG_INTERNAL_STATS - - aom_clear_system_state(); - - return 0; -} - -int av1_get_preview_raw_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *dest) { - AV1_COMMON *cm = &cpi->common; - if (!cm->show_frame) { - return -1; - } else { - int ret; - if (cm->frame_to_show) { - *dest = *cm->frame_to_show; - dest->y_width = cm->width; - dest->y_height = cm->height; - dest->uv_width = cm->width >> cm->seq_params.subsampling_x; - dest->uv_height = cm->height >> cm->seq_params.subsampling_y; - ret = 0; - } else { - ret = -1; - } - aom_clear_system_state(); - return ret; - } -} - -int av1_get_last_show_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *frame) { - if (cpi->last_show_frame_buf_idx == INVALID_IDX) return -1; - - *frame = - cpi->common.buffer_pool->frame_bufs[cpi->last_show_frame_buf_idx].buf; - return 0; -} - -static int equal_dimensions_and_border(const YV12_BUFFER_CONFIG *a, - const YV12_BUFFER_CONFIG *b) { - return a->y_height == b->y_height && a->y_width == b->y_width && - a->uv_height == b->uv_height && a->uv_width == b->uv_width && - a->y_stride == b->y_stride && a->uv_stride == b->uv_stride && - a->border == b->border && - (a->flags & YV12_FLAG_HIGHBITDEPTH) == - (b->flags & YV12_FLAG_HIGHBITDEPTH); -} - -aom_codec_err_t av1_copy_new_frame_enc(AV1_COMMON *cm, - YV12_BUFFER_CONFIG *new_frame, - YV12_BUFFER_CONFIG *sd) { - const int num_planes = av1_num_planes(cm); - if (!equal_dimensions_and_border(new_frame, sd)) - aom_internal_error(&cm->error, AOM_CODEC_ERROR, - "Incorrect buffer dimensions"); - else - aom_yv12_copy_frame(new_frame, sd, num_planes); - - return cm->error.error_code; -} - -int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode, - AOM_SCALING vert_mode) { - int hr = 0, hs = 0, vr = 0, vs = 0; - - if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1; - - Scale2Ratio(horiz_mode, &hr, &hs); - Scale2Ratio(vert_mode, &vr, &vs); - - // always go to the next whole number - cpi->resize_pending_width = (hs - 1 + cpi->oxcf.width * hr) / hs; - cpi->resize_pending_height = (vs - 1 + cpi->oxcf.height * vr) / vs; - - return 0; -} - -int av1_get_quantizer(AV1_COMP *cpi) { return cpi->common.base_qindex; } - -int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *frame_size) { - size_t output_size = 0; - size_t total_bytes_read = 0; - size_t remaining_size = *frame_size; - uint8_t *buff_ptr = buffer; - - // go through each OBUs - while (total_bytes_read < *frame_size) { - uint8_t saved_obu_header[2]; - uint64_t obu_payload_size; - size_t length_of_payload_size; - size_t length_of_obu_size; - uint32_t obu_header_size = (buff_ptr[0] >> 2) & 0x1 ? 2 : 1; - size_t obu_bytes_read = obu_header_size; // bytes read for current obu - - // save the obu header (1 or 2 bytes) - memmove(saved_obu_header, buff_ptr, obu_header_size); - // clear the obu_has_size_field - saved_obu_header[0] = saved_obu_header[0] & (~0x2); - - // get the payload_size and length of payload_size - if (aom_uleb_decode(buff_ptr + obu_header_size, remaining_size, - &obu_payload_size, &length_of_payload_size) != 0) { - return AOM_CODEC_ERROR; - } - obu_bytes_read += length_of_payload_size; - - // calculate the length of size of the obu header plus payload - length_of_obu_size = - aom_uleb_size_in_bytes((uint64_t)(obu_header_size + obu_payload_size)); - - // move the rest of data to new location - memmove(buff_ptr + length_of_obu_size + obu_header_size, - buff_ptr + obu_bytes_read, remaining_size - obu_bytes_read); - obu_bytes_read += (size_t)obu_payload_size; - - // write the new obu size - const uint64_t obu_size = obu_header_size + obu_payload_size; - size_t coded_obu_size; - if (aom_uleb_encode(obu_size, sizeof(obu_size), buff_ptr, - &coded_obu_size) != 0) { - return AOM_CODEC_ERROR; - } - - // write the saved (modified) obu_header following obu size - memmove(buff_ptr + length_of_obu_size, saved_obu_header, obu_header_size); - - total_bytes_read += obu_bytes_read; - remaining_size -= obu_bytes_read; - buff_ptr += length_of_obu_size + obu_size; - output_size += length_of_obu_size + (size_t)obu_size; - } - - *frame_size = output_size; - return AOM_CODEC_OK; -} - -void av1_apply_encoding_flags(AV1_COMP *cpi, aom_enc_frame_flags_t flags) { - // TODO(yunqingwang): For what references to use, external encoding flags - // should be consistent with internal reference frame selection. Need to - // ensure that there is not conflict between the two. In AV1 encoder, the - // priority rank for 7 reference frames are: LAST, ALTREF, LAST2, LAST3, - // GOLDEN, BWDREF, ALTREF2. If only one reference frame is used, it must be - // LAST. - cpi->ext_ref_frame_flags = AOM_REFFRAME_ALL; - if (flags & - (AOM_EFLAG_NO_REF_LAST | AOM_EFLAG_NO_REF_LAST2 | AOM_EFLAG_NO_REF_LAST3 | - AOM_EFLAG_NO_REF_GF | AOM_EFLAG_NO_REF_ARF | AOM_EFLAG_NO_REF_BWD | - AOM_EFLAG_NO_REF_ARF2)) { - if (flags & AOM_EFLAG_NO_REF_LAST) { - cpi->ext_ref_frame_flags = 0; - } else { - int ref = AOM_REFFRAME_ALL; - - if (flags & AOM_EFLAG_NO_REF_LAST2) ref ^= AOM_LAST2_FLAG; - if (flags & AOM_EFLAG_NO_REF_LAST3) ref ^= AOM_LAST3_FLAG; - - if (flags & AOM_EFLAG_NO_REF_GF) ref ^= AOM_GOLD_FLAG; - - if (flags & AOM_EFLAG_NO_REF_ARF) { - ref ^= AOM_ALT_FLAG; - ref ^= AOM_BWD_FLAG; - ref ^= AOM_ALT2_FLAG; - } else { - if (flags & AOM_EFLAG_NO_REF_BWD) ref ^= AOM_BWD_FLAG; - if (flags & AOM_EFLAG_NO_REF_ARF2) ref ^= AOM_ALT2_FLAG; - } - - av1_use_as_reference(cpi, ref); - } - } - - if (flags & - (AOM_EFLAG_NO_UPD_LAST | AOM_EFLAG_NO_UPD_GF | AOM_EFLAG_NO_UPD_ARF)) { - int upd = AOM_REFFRAME_ALL; - - // Refreshing LAST/LAST2/LAST3 is handled by 1 common flag. - if (flags & AOM_EFLAG_NO_UPD_LAST) upd ^= AOM_LAST_FLAG; - - if (flags & AOM_EFLAG_NO_UPD_GF) upd ^= AOM_GOLD_FLAG; - - if (flags & AOM_EFLAG_NO_UPD_ARF) { - upd ^= AOM_ALT_FLAG; - upd ^= AOM_BWD_FLAG; - upd ^= AOM_ALT2_FLAG; - } - - av1_update_reference(cpi, upd); - } - - cpi->ext_use_ref_frame_mvs = cpi->oxcf.allow_ref_frame_mvs & - ((flags & AOM_EFLAG_NO_REF_FRAME_MVS) == 0); - cpi->ext_use_error_resilient = cpi->oxcf.error_resilient_mode | - ((flags & AOM_EFLAG_ERROR_RESILIENT) != 0); - cpi->ext_use_s_frame = - cpi->oxcf.s_frame_mode | ((flags & AOM_EFLAG_SET_S_FRAME) != 0); - cpi->ext_use_primary_ref_none = (flags & AOM_EFLAG_SET_PRIMARY_REF_NONE) != 0; - - if (flags & AOM_EFLAG_NO_UPD_ENTROPY) { - av1_update_entropy(cpi, 0); - } -} - -int64_t timebase_units_to_ticks(const aom_rational_t *timebase, int64_t n) { - return n * TICKS_PER_SEC * timebase->num / timebase->den; -} - -int64_t ticks_to_timebase_units(const aom_rational_t *timebase, int64_t n) { - const int64_t round = TICKS_PER_SEC * timebase->num / 2 - 1; - return (n * timebase->den + round) / timebase->num / TICKS_PER_SEC; -} - -aom_fixed_buf_t *av1_get_global_headers(AV1_COMP *cpi) { - if (!cpi) return NULL; - - uint8_t header_buf[512] = { 0 }; - const uint32_t sequence_header_size = - write_sequence_header_obu(cpi, &header_buf[0]); - assert(sequence_header_size <= sizeof(header_buf)); - if (sequence_header_size == 0) return NULL; - - const size_t obu_header_size = 1; - const size_t size_field_size = aom_uleb_size_in_bytes(sequence_header_size); - const size_t payload_offset = obu_header_size + size_field_size; - - if (payload_offset + sequence_header_size > sizeof(header_buf)) return NULL; - memmove(&header_buf[payload_offset], &header_buf[0], sequence_header_size); - - if (write_obu_header(OBU_SEQUENCE_HEADER, 0, &header_buf[0]) != - obu_header_size) { - return NULL; - } - - size_t coded_size_field_size = 0; - if (aom_uleb_encode(sequence_header_size, size_field_size, - &header_buf[obu_header_size], - &coded_size_field_size) != 0) { - return NULL; - } - assert(coded_size_field_size == size_field_size); - - aom_fixed_buf_t *global_headers = - (aom_fixed_buf_t *)malloc(sizeof(*global_headers)); - if (!global_headers) return NULL; - - const size_t global_header_buf_size = - obu_header_size + size_field_size + sequence_header_size; - - global_headers->buf = malloc(global_header_buf_size); - if (!global_headers->buf) { - free(global_headers); - return NULL; - } - - memcpy(global_headers->buf, &header_buf[0], global_header_buf_size); - global_headers->sz = global_header_buf_size; - return global_headers; -} diff --git a/third_party/aom/av1/encoder/encoder.h b/third_party/aom/av1/encoder/encoder.h deleted file mode 100644 index ee7fc4637..000000000 --- a/third_party/aom/av1/encoder/encoder.h +++ /dev/null @@ -1,985 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_ENCODER_H_ -#define AOM_AV1_ENCODER_ENCODER_H_ - -#include - -#include "config/aom_config.h" - -#include "aom/aomcx.h" - -#include "av1/common/alloccommon.h" -#include "av1/common/entropymode.h" -#include "av1/common/thread_common.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/resize.h" -#include "av1/common/timing.h" -#include "av1/encoder/aq_cyclicrefresh.h" -#include "av1/encoder/av1_quantize.h" -#include "av1/encoder/context_tree.h" -#include "av1/encoder/encodemb.h" -#include "av1/encoder/firstpass.h" -#include "av1/encoder/lookahead.h" -#include "av1/encoder/mbgraph.h" -#include "av1/encoder/mcomp.h" -#include "av1/encoder/ratectrl.h" -#include "av1/encoder/rd.h" -#include "av1/encoder/speed_features.h" -#include "av1/encoder/tokenize.h" - -#if CONFIG_INTERNAL_STATS -#include "aom_dsp/ssim.h" -#endif -#include "aom_dsp/variance.h" -#if CONFIG_DENOISE -#include "aom_dsp/noise_model.h" -#endif -#include "aom/internal/aom_codec_internal.h" -#include "aom_util/aom_thread.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - int nmv_vec_cost[MV_JOINTS]; - int nmv_costs[2][MV_VALS]; - int nmv_costs_hp[2][MV_VALS]; - - FRAME_CONTEXT fc; -} CODING_CONTEXT; - -typedef enum { - // regular inter frame - REGULAR_FRAME = 0, - // alternate reference frame - ARF_FRAME = 1, - // overlay frame - OVERLAY_FRAME = 2, - // golden frame - GLD_FRAME = 3, - // backward reference frame - BRF_FRAME = 4, - // extra alternate reference frame - EXT_ARF_FRAME = 5, - FRAME_CONTEXT_INDEXES -} FRAME_CONTEXT_INDEX; - -typedef enum { - NORMAL = 0, - FOURFIVE = 1, - THREEFIVE = 2, - ONETWO = 3 -} AOM_SCALING; - -typedef enum { - // Good Quality Fast Encoding. The encoder balances quality with the amount of - // time it takes to encode the output. Speed setting controls how fast. - GOOD -} MODE; - -typedef enum { - FRAMEFLAGS_KEY = 1 << 0, - FRAMEFLAGS_GOLDEN = 1 << 1, - FRAMEFLAGS_BWDREF = 1 << 2, - // TODO(zoeliu): To determine whether a frame flag is needed for ALTREF2_FRAME - FRAMEFLAGS_ALTREF = 1 << 3, -} FRAMETYPE_FLAGS; - -typedef enum { - NO_AQ = 0, - VARIANCE_AQ = 1, - COMPLEXITY_AQ = 2, - CYCLIC_REFRESH_AQ = 3, - AQ_MODE_COUNT // This should always be the last member of the enum -} AQ_MODE; -typedef enum { - NO_DELTA_Q = 0, - DELTA_Q_ONLY = 1, - DELTA_Q_LF = 2, - DELTAQ_MODE_COUNT // This should always be the last member of the enum -} DELTAQ_MODE; - -typedef enum { - RESIZE_NONE = 0, // No frame resizing allowed. - RESIZE_FIXED = 1, // All frames are coded at the specified scale. - RESIZE_RANDOM = 2, // All frames are coded at a random scale. - RESIZE_MODES -} RESIZE_MODE; - -typedef enum { - SUPERRES_NONE = 0, // No frame superres allowed - SUPERRES_FIXED = 1, // All frames are coded at the specified scale, - // and super-resolved. - SUPERRES_RANDOM = 2, // All frames are coded at a random scale, - // and super-resolved. - SUPERRES_QTHRESH = 3, // Superres scale for a frame is determined based on - // q_index - SUPERRES_MODES -} SUPERRES_MODE; - -typedef struct AV1EncoderConfig { - BITSTREAM_PROFILE profile; - aom_bit_depth_t bit_depth; // Codec bit-depth. - int width; // width of data passed to the compressor - int height; // height of data passed to the compressor - int forced_max_frame_width; // forced maximum width of frame (if != 0) - int forced_max_frame_height; // forced maximum height of frame (if != 0) - unsigned int input_bit_depth; // Input bit depth. - double init_framerate; // set to passed in framerate - int64_t target_bandwidth; // bandwidth to be used in bits per second - - int noise_sensitivity; // pre processing blur: recommendation 0 - int sharpness; // sharpening output: recommendation 0: - int speed; - // maximum allowed bitrate for any intra frame in % of bitrate target. - unsigned int rc_max_intra_bitrate_pct; - // maximum allowed bitrate for any inter frame in % of bitrate target. - unsigned int rc_max_inter_bitrate_pct; - // percent of rate boost for golden frame in CBR mode. - unsigned int gf_cbr_boost_pct; - - MODE mode; - int pass; - - // Key Framing Operations - int auto_key; // autodetect cut scenes and set the keyframes - int key_freq; // maximum distance to key frame. - int sframe_dist; - int sframe_mode; - int sframe_enabled; - int lag_in_frames; // how many frames lag before we start encoding - int fwd_kf_enabled; - - // ---------------------------------------------------------------- - // DATARATE CONTROL OPTIONS - - // vbr, cbr, constrained quality or constant quality - enum aom_rc_mode rc_mode; - - // buffer targeting aggressiveness - int under_shoot_pct; - int over_shoot_pct; - - // buffering parameters - int64_t starting_buffer_level_ms; - int64_t optimal_buffer_level_ms; - int64_t maximum_buffer_size_ms; - - // Frame drop threshold. - int drop_frames_water_mark; - - // controlling quality - int fixed_q; - int worst_allowed_q; - int best_allowed_q; - int cq_level; - AQ_MODE aq_mode; // Adaptive Quantization mode - DELTAQ_MODE deltaq_mode; - int enable_cdef; - int enable_restoration; - int disable_trellis_quant; - int using_qm; - int qm_y; - int qm_u; - int qm_v; - int qm_minlevel; - int qm_maxlevel; -#if CONFIG_DIST_8X8 - int using_dist_8x8; -#endif - unsigned int num_tile_groups; - unsigned int mtu; - - // Internal frame size scaling. - RESIZE_MODE resize_mode; - uint8_t resize_scale_denominator; - uint8_t resize_kf_scale_denominator; - - // Frame Super-Resolution size scaling. - SUPERRES_MODE superres_mode; - uint8_t superres_scale_denominator; - uint8_t superres_kf_scale_denominator; - int superres_qthresh; - int superres_kf_qthresh; - - // Enable feature to reduce the frame quantization every x frames. - int frame_periodic_boost; - - // two pass datarate control - int two_pass_vbrbias; // two pass datarate control tweaks - int two_pass_vbrmin_section; - int two_pass_vbrmax_section; - // END DATARATE CONTROL OPTIONS - // ---------------------------------------------------------------- - - int enable_auto_arf; - int enable_auto_brf; // (b)ackward (r)ef (f)rame - - /* Bitfield defining the error resiliency features to enable. - * Can provide decodable frames after losses in previous - * frames and decodable partitions after losses in the same frame. - */ - unsigned int error_resilient_mode; - - unsigned int s_frame_mode; - - /* Bitfield defining the parallel decoding mode where the - * decoding in successive frames may be conducted in parallel - * just by decoding the frame headers. - */ - unsigned int frame_parallel_decoding_mode; - - unsigned int limit; - - int arnr_max_frames; - int arnr_strength; - - int min_gf_interval; - int max_gf_interval; - - int row_mt; - int tile_columns; - int tile_rows; - int tile_width_count; - int tile_height_count; - int tile_widths[MAX_TILE_COLS]; - int tile_heights[MAX_TILE_ROWS]; - - int max_threads; - - aom_fixed_buf_t two_pass_stats_in; - struct aom_codec_pkt_list *output_pkt_list; - -#if CONFIG_FP_MB_STATS - aom_fixed_buf_t firstpass_mb_stats_in; -#endif - - aom_tune_metric tuning; - aom_tune_content content; - int use_highbitdepth; - aom_color_primaries_t color_primaries; - aom_transfer_characteristics_t transfer_characteristics; - aom_matrix_coefficients_t matrix_coefficients; - aom_chroma_sample_position_t chroma_sample_position; - int color_range; - int render_width; - int render_height; - aom_timing_info_type_t timing_info_type; - int timing_info_present; - aom_timing_info_t timing_info; - int decoder_model_info_present_flag; - int display_model_info_present_flag; - int buffer_removal_time_present; - aom_dec_model_info_t buffer_model; - aom_dec_model_op_parameters_t op_params[MAX_NUM_OPERATING_POINTS + 1]; - aom_op_timing_info_t op_frame_timing[MAX_NUM_OPERATING_POINTS + 1]; - int film_grain_test_vector; - const char *film_grain_table_filename; - - uint8_t cdf_update_mode; - aom_superblock_size_t superblock_size; - unsigned int large_scale_tile; - unsigned int single_tile_decoding; - int monochrome; - unsigned int full_still_picture_hdr; - int enable_dual_filter; - unsigned int motion_vector_unit_test; - const cfg_options_t *cfg; - int enable_order_hint; - int enable_jnt_comp; - int enable_ref_frame_mvs; - unsigned int allow_ref_frame_mvs; - int enable_warped_motion; - int allow_warped_motion; - int enable_superres; - unsigned int save_as_annexb; - -#if CONFIG_DENOISE - float noise_level; - int noise_block_size; -#endif - - unsigned int chroma_subsampling_x; - unsigned int chroma_subsampling_y; -} AV1EncoderConfig; - -static INLINE int is_lossless_requested(const AV1EncoderConfig *cfg) { - return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0; -} - -typedef struct FRAME_COUNTS { -// Note: This structure should only contain 'unsigned int' fields, or -// aggregates built solely from 'unsigned int' fields/elements -#if CONFIG_ENTROPY_STATS - unsigned int kf_y_mode[KF_MODE_CONTEXTS][KF_MODE_CONTEXTS][INTRA_MODES]; - unsigned int angle_delta[DIRECTIONAL_MODES][2 * MAX_ANGLE_DELTA + 1]; - unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES]; - unsigned int uv_mode[CFL_ALLOWED_TYPES][INTRA_MODES][UV_INTRA_MODES]; - unsigned int cfl_sign[CFL_JOINT_SIGNS]; - unsigned int cfl_alpha[CFL_ALPHA_CONTEXTS][CFL_ALPHABET_SIZE]; - unsigned int palette_y_mode[PALATTE_BSIZE_CTXS][PALETTE_Y_MODE_CONTEXTS][2]; - unsigned int palette_uv_mode[PALETTE_UV_MODE_CONTEXTS][2]; - unsigned int palette_y_size[PALATTE_BSIZE_CTXS][PALETTE_SIZES]; - unsigned int palette_uv_size[PALATTE_BSIZE_CTXS][PALETTE_SIZES]; - unsigned int palette_y_color_index[PALETTE_SIZES] - [PALETTE_COLOR_INDEX_CONTEXTS] - [PALETTE_COLORS]; - unsigned int palette_uv_color_index[PALETTE_SIZES] - [PALETTE_COLOR_INDEX_CONTEXTS] - [PALETTE_COLORS]; - unsigned int partition[PARTITION_CONTEXTS][EXT_PARTITION_TYPES]; - unsigned int txb_skip[TOKEN_CDF_Q_CTXS][TX_SIZES][TXB_SKIP_CONTEXTS][2]; - unsigned int eob_extra[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES] - [EOB_COEF_CONTEXTS][2]; - unsigned int dc_sign[PLANE_TYPES][DC_SIGN_CONTEXTS][2]; - unsigned int coeff_lps[TX_SIZES][PLANE_TYPES][BR_CDF_SIZE - 1][LEVEL_CONTEXTS] - [2]; - unsigned int eob_flag[TX_SIZES][PLANE_TYPES][EOB_COEF_CONTEXTS][2]; - unsigned int eob_multi16[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][5]; - unsigned int eob_multi32[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][6]; - unsigned int eob_multi64[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][7]; - unsigned int eob_multi128[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][8]; - unsigned int eob_multi256[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][9]; - unsigned int eob_multi512[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][10]; - unsigned int eob_multi1024[TOKEN_CDF_Q_CTXS][PLANE_TYPES][2][11]; - unsigned int coeff_lps_multi[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES] - [LEVEL_CONTEXTS][BR_CDF_SIZE]; - unsigned int coeff_base_multi[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES] - [SIG_COEF_CONTEXTS][NUM_BASE_LEVELS + 2]; - unsigned int coeff_base_eob_multi[TOKEN_CDF_Q_CTXS][TX_SIZES][PLANE_TYPES] - [SIG_COEF_CONTEXTS_EOB][NUM_BASE_LEVELS + 1]; - unsigned int newmv_mode[NEWMV_MODE_CONTEXTS][2]; - unsigned int zeromv_mode[GLOBALMV_MODE_CONTEXTS][2]; - unsigned int refmv_mode[REFMV_MODE_CONTEXTS][2]; - unsigned int drl_mode[DRL_MODE_CONTEXTS][2]; - unsigned int inter_compound_mode[INTER_MODE_CONTEXTS][INTER_COMPOUND_MODES]; - unsigned int wedge_idx[BLOCK_SIZES_ALL][16]; - unsigned int interintra[BLOCK_SIZE_GROUPS][2]; - unsigned int interintra_mode[BLOCK_SIZE_GROUPS][INTERINTRA_MODES]; - unsigned int wedge_interintra[BLOCK_SIZES_ALL][2]; - unsigned int compound_type[BLOCK_SIZES_ALL][COMPOUND_TYPES - 1]; - unsigned int motion_mode[BLOCK_SIZES_ALL][MOTION_MODES]; - unsigned int obmc[BLOCK_SIZES_ALL][2]; - unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; - unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; - unsigned int comp_ref_type[COMP_REF_TYPE_CONTEXTS][2]; - unsigned int uni_comp_ref[UNI_COMP_REF_CONTEXTS][UNIDIR_COMP_REFS - 1][2]; - unsigned int single_ref[REF_CONTEXTS][SINGLE_REFS - 1][2]; - unsigned int comp_ref[REF_CONTEXTS][FWD_REFS - 1][2]; - unsigned int comp_bwdref[REF_CONTEXTS][BWD_REFS - 1][2]; - unsigned int intrabc[2]; - - unsigned int txfm_partition[TXFM_PARTITION_CONTEXTS][2]; - unsigned int intra_tx_size[MAX_TX_CATS][TX_SIZE_CONTEXTS][MAX_TX_DEPTH + 1]; - unsigned int skip_mode[SKIP_MODE_CONTEXTS][2]; - unsigned int skip[SKIP_CONTEXTS][2]; - unsigned int compound_index[COMP_INDEX_CONTEXTS][2]; - unsigned int comp_group_idx[COMP_GROUP_IDX_CONTEXTS][2]; - unsigned int delta_q[DELTA_Q_PROBS][2]; - unsigned int delta_lf_multi[FRAME_LF_COUNT][DELTA_LF_PROBS][2]; - unsigned int delta_lf[DELTA_LF_PROBS][2]; - - unsigned int inter_ext_tx[EXT_TX_SETS_INTER][EXT_TX_SIZES][TX_TYPES]; - unsigned int intra_ext_tx[EXT_TX_SETS_INTRA][EXT_TX_SIZES][INTRA_MODES] - [TX_TYPES]; - unsigned int filter_intra_mode[FILTER_INTRA_MODES]; - unsigned int filter_intra[BLOCK_SIZES_ALL][2]; - unsigned int switchable_restore[RESTORE_SWITCHABLE_TYPES]; - unsigned int wiener_restore[2]; - unsigned int sgrproj_restore[2]; -#endif // CONFIG_ENTROPY_STATS - - unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS] - [SWITCHABLE_FILTERS]; -} FRAME_COUNTS; - -#if CONFIG_COLLECT_INTER_MODE_RD_STATS -#define INTER_MODE_RD_DATA_OVERALL_SIZE 6400 - -typedef struct { - int ready; - double a; - double b; - double dist_mean; - double ld_mean; - double sse_mean; - double sse_sse_mean; - double sse_ld_mean; - int num; - double dist_sum; - double ld_sum; - double sse_sum; - double sse_sse_sum; - double sse_ld_sum; -} InterModeRdModel; - -typedef struct { - int idx; - int64_t rd; -} RdIdxPair; -// TODO(angiebird): This is an estimated size. We still need to figure what is -// the maximum number of modes. -#define MAX_INTER_MODES 1024 -typedef struct inter_modes_info { - int num; - MB_MODE_INFO mbmi_arr[MAX_INTER_MODES]; - int mode_rate_arr[MAX_INTER_MODES]; - int64_t sse_arr[MAX_INTER_MODES]; - int64_t est_rd_arr[MAX_INTER_MODES]; - RdIdxPair rd_idx_pair_arr[MAX_INTER_MODES]; -} InterModesInfo; -#endif - -// TODO(jingning) All spatially adaptive variables should go to TileDataEnc. -typedef struct TileDataEnc { - TileInfo tile_info; - int thresh_freq_fact[BLOCK_SIZES_ALL][MAX_MODES]; - int mode_map[BLOCK_SIZES_ALL][MAX_MODES]; - int m_search_count; - int ex_search_count; - CFL_CTX cfl; - DECLARE_ALIGNED(16, FRAME_CONTEXT, tctx); - uint8_t allow_update_cdf; -#if CONFIG_COLLECT_INTER_MODE_RD_STATS - InterModeRdModel inter_mode_rd_models[BLOCK_SIZES_ALL]; - InterModesInfo inter_modes_info; -#endif -} TileDataEnc; - -typedef struct { - TOKENEXTRA *start; - TOKENEXTRA *stop; - unsigned int count; -} TOKENLIST; - -typedef struct RD_COUNTS { - int64_t comp_pred_diff[REFERENCE_MODES]; - // Stores number of 4x4 blocks using global motion per reference frame. - int global_motion_used[REF_FRAMES]; - int compound_ref_used_flag; - int skip_mode_used_flag; -} RD_COUNTS; - -typedef struct ThreadData { - MACROBLOCK mb; - RD_COUNTS rd_counts; - FRAME_COUNTS *counts; - PC_TREE *pc_tree; - PC_TREE *pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2 + 1]; - uint32_t *hash_value_buffer[2][2]; - int32_t *wsrc_buf; - int32_t *mask_buf; - uint8_t *above_pred_buf; - uint8_t *left_pred_buf; - PALETTE_BUFFER *palette_buffer; - CONV_BUF_TYPE *tmp_conv_dst; - uint8_t *tmp_obmc_bufs[2]; - int intrabc_used_this_tile; -} ThreadData; - -struct EncWorkerData; - -typedef struct ActiveMap { - int enabled; - int update; - unsigned char *map; -} ActiveMap; - -#if CONFIG_INTERNAL_STATS -// types of stats -typedef enum { - STAT_Y, - STAT_U, - STAT_V, - STAT_ALL, - NUM_STAT_TYPES // This should always be the last member of the enum -} StatType; - -typedef struct IMAGE_STAT { - double stat[NUM_STAT_TYPES]; - double worst; -} ImageStat; -#endif // CONFIG_INTERNAL_STATS - -typedef struct { - int ref_count; - YV12_BUFFER_CONFIG buf; -} EncRefCntBuffer; - -typedef struct TileBufferEnc { - uint8_t *data; - size_t size; -} TileBufferEnc; - -typedef struct AV1_COMP { - QUANTS quants; - ThreadData td; - FRAME_COUNTS counts; - MB_MODE_INFO_EXT *mbmi_ext_base; - CB_COEFF_BUFFER *coeff_buffer_base; - Dequants dequants; - AV1_COMMON common; - AV1EncoderConfig oxcf; - struct lookahead_ctx *lookahead; - struct lookahead_entry *alt_ref_source; - int no_show_kf; - - int optimize_speed_feature; - int optimize_seg_arr[MAX_SEGMENTS]; - - YV12_BUFFER_CONFIG *source; - YV12_BUFFER_CONFIG *last_source; // NULL for first frame and alt_ref frames - YV12_BUFFER_CONFIG *unscaled_source; - YV12_BUFFER_CONFIG scaled_source; - YV12_BUFFER_CONFIG *unscaled_last_source; - YV12_BUFFER_CONFIG scaled_last_source; - - // For a still frame, this flag is set to 1 to skip partition search. - int partition_search_skippable_frame; - double csm_rate_array[32]; - double m_rate_array[32]; - int rate_size; - int rate_index; - hash_table *previous_hash_table; - int previous_index; - int cur_poc; // DebugInfo - - unsigned int row_mt; - int scaled_ref_idx[REF_FRAMES]; - int ref_fb_idx[REF_FRAMES]; - int refresh_fb_idx; // ref frame buffer index to refresh - - int last_show_frame_buf_idx; // last show frame buffer index - - int refresh_last_frame; - int refresh_golden_frame; - int refresh_bwd_ref_frame; - int refresh_alt2_ref_frame; - int refresh_alt_ref_frame; -#if USE_SYMM_MULTI_LAYER - int new_bwdref_update_rule; -#endif - - int ext_refresh_frame_flags_pending; - int ext_refresh_last_frame; - int ext_refresh_golden_frame; - int ext_refresh_bwd_ref_frame; - int ext_refresh_alt2_ref_frame; - int ext_refresh_alt_ref_frame; - - int ext_refresh_frame_context_pending; - int ext_refresh_frame_context; - int ext_use_ref_frame_mvs; - int ext_use_error_resilient; - int ext_use_s_frame; - int ext_use_primary_ref_none; - - YV12_BUFFER_CONFIG last_frame_uf; - YV12_BUFFER_CONFIG trial_frame_rst; - - // Ambient reconstruction err target for force key frames - int64_t ambient_err; - - RD_OPT rd; - - CODING_CONTEXT coding_context; - - int gmtype_cost[TRANS_TYPES]; - int gmparams_cost[REF_FRAMES]; - - int nmv_costs[2][MV_VALS]; - int nmv_costs_hp[2][MV_VALS]; - - int64_t last_time_stamp_seen; - int64_t last_end_time_stamp_seen; - int64_t first_time_stamp_ever; - - RATE_CONTROL rc; - double framerate; - - // NOTE(zoeliu): Any inter frame allows maximum of REF_FRAMES inter - // references; Plus the currently coded frame itself, it is needed to allocate - // sufficient space to the size of the maximum possible number of frames. - int interp_filter_selected[REF_FRAMES + 1][SWITCHABLE]; - - struct aom_codec_pkt_list *output_pkt_list; - - MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS]; - int mbgraph_n_frames; // number of frames filled in the above - int static_mb_pct; // % forced skip mbs by segmentation - int ref_frame_flags; - int ext_ref_frame_flags; - RATE_FACTOR_LEVEL frame_rf_level[FRAME_BUFFERS]; - - SPEED_FEATURES sf; - - unsigned int max_mv_magnitude; - int mv_step_param; - - int allow_comp_inter_inter; - int all_one_sided_refs; - - uint8_t *segmentation_map; - - CYCLIC_REFRESH *cyclic_refresh; - ActiveMap active_map; - - fractional_mv_step_fp *find_fractional_mv_step; - av1_diamond_search_fn_t diamond_search_sad; - aom_variance_fn_ptr_t fn_ptr[BLOCK_SIZES_ALL]; - uint64_t time_receive_data; - uint64_t time_compress_data; - uint64_t time_pick_lpf; - uint64_t time_encode_sb_row; - -#if CONFIG_FP_MB_STATS - int use_fp_mb_stats; -#endif - - TWO_PASS twopass; - - YV12_BUFFER_CONFIG alt_ref_buffer; - -#if CONFIG_INTERNAL_STATS - unsigned int mode_chosen_counts[MAX_MODES]; - - int count; - uint64_t total_sq_error; - uint64_t total_samples; - ImageStat psnr; - - double total_blockiness; - double worst_blockiness; - - int bytes; - double summed_quality; - double summed_weights; - unsigned int tot_recode_hits; - double worst_ssim; - - ImageStat fastssim; - ImageStat psnrhvs; - - int b_calculate_blockiness; - int b_calculate_consistency; - - double total_inconsistency; - double worst_consistency; - Ssimv *ssim_vars; - Metrics metrics; -#endif - int b_calculate_psnr; - - int droppable; - - int initial_width; - int initial_height; - int initial_mbs; // Number of MBs in the full-size frame; to be used to - // normalize the firstpass stats. This will differ from the - // number of MBs in the current frame when the frame is - // scaled. - - // When resize is triggered through external control, the desired width/height - // are stored here until use in the next frame coded. They are effective only - // for - // one frame and are reset after use. - int resize_pending_width; - int resize_pending_height; - - int frame_flags; - - search_site_config ss_cfg; - - TileDataEnc *tile_data; - int allocated_tiles; // Keep track of memory allocated for tiles. - - TOKENEXTRA *tile_tok[MAX_TILE_ROWS][MAX_TILE_COLS]; - unsigned int tok_count[MAX_TILE_ROWS][MAX_TILE_COLS]; - TOKENLIST *tplist[MAX_TILE_ROWS][MAX_TILE_COLS]; - - TileBufferEnc tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS]; - - int resize_state; - int resize_avg_qp; - int resize_buffer_underflow; - int resize_count; - - // Sequence parameters have been transmitted already and locked - // or not. Once locked av1_change_config cannot change the seq - // parameters. - int seq_params_locked; - - // VARIANCE_AQ segment map refresh - int vaq_refresh; - - // Multi-threading - int num_workers; - AVxWorker *workers; - struct EncWorkerData *tile_thr_data; - int refresh_frame_mask; - int existing_fb_idx_to_show; - int is_arf_filter_off[MAX_EXT_ARFS + 1]; - int num_extra_arfs; - int arf_pos_in_gf[MAX_EXT_ARFS + 1]; - int arf_pos_for_ovrly[MAX_EXT_ARFS + 1]; - int global_motion_search_done; - tran_low_t *tcoeff_buf[MAX_MB_PLANE]; - int extra_arf_allowed; - // A flag to indicate if intrabc is ever used in current frame. - int intrabc_used; - int dv_cost[2][MV_VALS]; - // TODO(huisu@google.com): we can update dv_joint_cost per SB. - int dv_joint_cost[MV_JOINTS]; - int has_lossless_segment; - - // For frame refs short signaling: - // A mapping of each reference frame from its encoder side value to the - // decoder side value obtained following the short signaling procedure. - int ref_conv[REF_FRAMES]; - - AV1LfSync lf_row_sync; - AV1LrSync lr_row_sync; - AV1LrStruct lr_ctxt; - - aom_film_grain_table_t *film_grain_table; -#if CONFIG_DENOISE - struct aom_denoise_and_model_t *denoise_and_model; -#endif - // Stores the default value of skip flag depending on chroma format - // Set as 1 for monochrome and 3 for other color formats - int default_interp_skip_flags; - int preserve_arf_as_gld; -} AV1_COMP; - -// Must not be called more than once. -void av1_initialize_enc(void); - -struct AV1_COMP *av1_create_compressor(AV1EncoderConfig *oxcf, - BufferPool *const pool); -void av1_remove_compressor(AV1_COMP *cpi); - -void av1_change_config(AV1_COMP *cpi, const AV1EncoderConfig *oxcf); - -// receive a frames worth of data. caller can assume that a copy of this -// frame is made and not just a copy of the pointer.. -int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags, - YV12_BUFFER_CONFIG *sd, int64_t time_stamp, - int64_t end_time_stamp); - -int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags, - size_t *size, uint8_t *dest, int64_t *time_stamp, - int64_t *time_end, int flush, - const aom_rational_t *timebase); - -int av1_get_preview_raw_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *dest); - -int av1_get_last_show_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *frame); - -aom_codec_err_t av1_copy_new_frame_enc(AV1_COMMON *cm, - YV12_BUFFER_CONFIG *new_frame, - YV12_BUFFER_CONFIG *sd); - -int av1_use_as_reference(AV1_COMP *cpi, int ref_frame_flags); - -void av1_update_reference(AV1_COMP *cpi, int ref_frame_flags); - -int av1_copy_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd); - -int av1_set_reference_enc(AV1_COMP *cpi, int idx, YV12_BUFFER_CONFIG *sd); - -int av1_update_entropy(AV1_COMP *cpi, int update); - -int av1_set_active_map(AV1_COMP *cpi, unsigned char *map, int rows, int cols); - -int av1_get_active_map(AV1_COMP *cpi, unsigned char *map, int rows, int cols); - -int av1_set_internal_size(AV1_COMP *cpi, AOM_SCALING horiz_mode, - AOM_SCALING vert_mode); - -int av1_get_quantizer(struct AV1_COMP *cpi); - -int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *input_size); - -int64_t timebase_units_to_ticks(const aom_rational_t *timebase, int64_t n); -int64_t ticks_to_timebase_units(const aom_rational_t *timebase, int64_t n); - -static INLINE int frame_is_kf_gf_arf(const AV1_COMP *cpi) { - return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame || - (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref); -} - -static INLINE int get_ref_frame_map_idx(const AV1_COMP *cpi, - MV_REFERENCE_FRAME ref_frame) { - return (ref_frame >= 1) ? cpi->ref_fb_idx[ref_frame - 1] : INVALID_IDX; -} - -static INLINE int get_ref_frame_buf_idx(const AV1_COMP *cpi, - MV_REFERENCE_FRAME ref_frame) { - const AV1_COMMON *const cm = &cpi->common; - const int map_idx = get_ref_frame_map_idx(cpi, ref_frame); - return (map_idx != INVALID_IDX) ? cm->ref_frame_map[map_idx] : INVALID_IDX; -} - -// TODO(huisu@google.com, youzhou@microsoft.com): enable hash-me for HBD. -static INLINE int av1_use_hash_me(const AV1_COMMON *const cm) { - return cm->allow_screen_content_tools; -} - -static INLINE hash_table *av1_get_ref_frame_hash_map( - const AV1_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { - const AV1_COMMON *const cm = &cpi->common; - const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); - return buf_idx != INVALID_IDX - ? &cm->buffer_pool->frame_bufs[buf_idx].hash_table - : NULL; -} - -static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( - const AV1_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { - const AV1_COMMON *const cm = &cpi->common; - const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); - return buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf - : NULL; -} - -static INLINE int enc_is_ref_frame_buf(AV1_COMP *cpi, RefCntBuffer *frame_buf) { - MV_REFERENCE_FRAME ref_frame; - AV1_COMMON *const cm = &cpi->common; - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); - if (buf_idx == INVALID_IDX) continue; - if (frame_buf == &cm->buffer_pool->frame_bufs[buf_idx]) break; - } - return (ref_frame <= ALTREF_FRAME); -} - -// Token buffer is only used for palette tokens. -static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols, - int sb_size_log2, - const int num_planes) { - // Calculate the maximum number of max superblocks in the image. - const int shift = sb_size_log2 - 4; - const int sb_size = 1 << sb_size_log2; - const int sb_size_square = sb_size * sb_size; - const int sb_rows = ALIGN_POWER_OF_TWO(mb_rows, shift) >> shift; - const int sb_cols = ALIGN_POWER_OF_TWO(mb_cols, shift) >> shift; - - // One palette token for each pixel. There can be palettes on two planes. - const int sb_palette_toks = AOMMIN(2, num_planes) * sb_size_square; - - return sb_rows * sb_cols * sb_palette_toks; -} - -// Get the allocated token size for a tile. It does the same calculation as in -// the frame token allocation. -static INLINE unsigned int allocated_tokens(TileInfo tile, int sb_size_log2, - int num_planes) { - int tile_mb_rows = (tile.mi_row_end - tile.mi_row_start + 2) >> 2; - int tile_mb_cols = (tile.mi_col_end - tile.mi_col_start + 2) >> 2; - - return get_token_alloc(tile_mb_rows, tile_mb_cols, sb_size_log2, num_planes); -} - -static INLINE void get_start_tok(AV1_COMP *cpi, int tile_row, int tile_col, - int mi_row, TOKENEXTRA **tok, int sb_size_log2, - int num_planes) { - AV1_COMMON *const cm = &cpi->common; - const int tile_cols = cm->tile_cols; - TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; - const TileInfo *const tile_info = &this_tile->tile_info; - - const int tile_mb_cols = - (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2; - const int tile_mb_row = (mi_row - tile_info->mi_row_start + 2) >> 2; - - *tok = cpi->tile_tok[tile_row][tile_col] + - get_token_alloc(tile_mb_row, tile_mb_cols, sb_size_log2, num_planes); -} - -void av1_apply_encoding_flags(AV1_COMP *cpi, aom_enc_frame_flags_t flags); - -#define ALT_MIN_LAG 3 -static INLINE int is_altref_enabled(const AV1_COMP *const cpi) { - return cpi->oxcf.lag_in_frames >= ALT_MIN_LAG && cpi->oxcf.enable_auto_arf; -} - -// TODO(zoeliu): To set up cpi->oxcf.enable_auto_brf - -static INLINE void set_ref_ptrs(const AV1_COMMON *cm, MACROBLOCKD *xd, - MV_REFERENCE_FRAME ref0, - MV_REFERENCE_FRAME ref1) { - xd->block_refs[0] = - &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME : 0]; - xd->block_refs[1] = - &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME : 0]; -} - -static INLINE int get_chessboard_index(int frame_index) { - return frame_index & 0x1; -} - -static INLINE int *cond_cost_list(const struct AV1_COMP *cpi, int *cost_list) { - return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL; -} - -void av1_new_framerate(AV1_COMP *cpi, double framerate); - -#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl)) - -// Update up-sampled reference frame index. -static INLINE void uref_cnt_fb(EncRefCntBuffer *ubufs, int *uidx, - int new_uidx) { - const int ref_index = *uidx; - - if (ref_index >= 0 && ubufs[ref_index].ref_count > 0) - ubufs[ref_index].ref_count--; - - *uidx = new_uidx; - ubufs[new_uidx].ref_count++; -} - -// Returns 1 if a frame is scaled and 0 otherwise. -static INLINE int av1_resize_scaled(const AV1_COMMON *cm) { - return !(cm->superres_upscaled_width == cm->render_width && - cm->superres_upscaled_height == cm->render_height); -} - -static INLINE int av1_frame_scaled(const AV1_COMMON *cm) { - return !av1_superres_scaled(cm) && av1_resize_scaled(cm); -} - -// Don't allow a show_existing_frame to coincide with an error resilient -// frame. An exception can be made for a forward keyframe since it has no -// previous dependencies. -static INLINE int encode_show_existing_frame(const AV1_COMMON *cm) { - return cm->show_existing_frame && - (!cm->error_resilient_mode || cm->frame_type == KEY_FRAME); -} - -// Returns a Sequence Header OBU stored in an aom_fixed_buf_t, or NULL upon -// failure. When a non-NULL aom_fixed_buf_t pointer is returned by this -// function, the memory must be freed by the caller. Both the buf member of the -// aom_fixed_buf_t, and the aom_fixed_buf_t pointer itself must be freed. Memory -// returned must be freed via call to free(). -// -// Note: The OBU returned is in Low Overhead Bitstream Format. Specifically, -// the obu_has_size_field bit is set, and the buffer contains the obu_size -// field. -aom_fixed_buf_t *av1_get_global_headers(AV1_COMP *cpi); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_ENCODER_H_ diff --git a/third_party/aom/av1/encoder/encodetxb.c b/third_party/aom/av1/encoder/encodetxb.c deleted file mode 100644 index 5a31d93d7..000000000 --- a/third_party/aom/av1/encoder/encodetxb.c +++ /dev/null @@ -1,2062 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "av1/encoder/encodetxb.h" - -#include "aom_ports/mem.h" -#include "av1/common/blockd.h" -#include "av1/common/idct.h" -#include "av1/common/pred_common.h" -#include "av1/common/scan.h" -#include "av1/encoder/bitstream.h" -#include "av1/encoder/cost.h" -#include "av1/encoder/encodeframe.h" -#include "av1/encoder/hash.h" -#include "av1/encoder/rdopt.h" -#include "av1/encoder/tokenize.h" - -static int hbt_needs_init = 1; -static CRC32C crc_calculator; -static const int HBT_EOB = 16; // also the length in opt_qcoeff -static const int HBT_TABLE_SIZE = 65536; // 16 bit: holds 65536 'arrays' -static const int HBT_ARRAY_LENGTH = 256; // 8 bit: 256 entries -// If removed in hbt_create_hashes or increased beyond int8_t, widen deltas type -static const int HBT_KICKOUT = 3; - -typedef struct OptTxbQcoeff { - // Use larger type if larger/no kickout value is used in hbt_create_hashes - int8_t deltas[16]; - uint32_t hbt_qc_hash; - uint32_t hbt_ctx_hash; - int init; - int rate_cost; -} OptTxbQcoeff; - -OptTxbQcoeff *hbt_hash_table; - -typedef struct LevelDownStats { - int update; - tran_low_t low_qc; - tran_low_t low_dqc; - int64_t dist0; - int rate; - int rate_low; - int64_t dist; - int64_t dist_low; - int64_t rd; - int64_t rd_low; - int64_t nz_rd; - int64_t rd_diff; - int cost_diff; - int64_t dist_diff; - int new_eob; -} LevelDownStats; - -void av1_alloc_txb_buf(AV1_COMP *cpi) { - AV1_COMMON *cm = &cpi->common; - int size = ((cm->mi_rows >> cm->seq_params.mib_size_log2) + 1) * - ((cm->mi_cols >> cm->seq_params.mib_size_log2) + 1); - - av1_free_txb_buf(cpi); - // TODO(jingning): This should be further reduced. - CHECK_MEM_ERROR(cm, cpi->coeff_buffer_base, - aom_memalign(32, sizeof(*cpi->coeff_buffer_base) * size)); -} - -void av1_free_txb_buf(AV1_COMP *cpi) { aom_free(cpi->coeff_buffer_base); } - -void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x, - int mi_row, int mi_col) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - int mib_size_log2 = cm->seq_params.mib_size_log2; - int stride = (cm->mi_cols >> mib_size_log2) + 1; - int offset = (mi_row >> mib_size_log2) * stride + (mi_col >> mib_size_log2); - CB_COEFF_BUFFER *coeff_buf = &cpi->coeff_buffer_base[offset]; - const int txb_offset = x->cb_offset / (TX_SIZE_W_MIN * TX_SIZE_H_MIN); - assert(x->cb_offset < (1 << num_pels_log2_lookup[cm->seq_params.sb_size])); - for (int plane = 0; plane < num_planes; ++plane) { - x->mbmi_ext->tcoeff[plane] = coeff_buf->tcoeff[plane] + x->cb_offset; - x->mbmi_ext->eobs[plane] = coeff_buf->eobs[plane] + txb_offset; - x->mbmi_ext->txb_skip_ctx[plane] = - coeff_buf->txb_skip_ctx[plane] + txb_offset; - x->mbmi_ext->dc_sign_ctx[plane] = - coeff_buf->dc_sign_ctx[plane] + txb_offset; - } -} - -static void write_golomb(aom_writer *w, int level) { - int x = level + 1; - int i = x; - int length = 0; - - while (i) { - i >>= 1; - ++length; - } - assert(length > 0); - - for (i = 0; i < length - 1; ++i) aom_write_bit(w, 0); - - for (i = length - 1; i >= 0; --i) aom_write_bit(w, (x >> i) & 0x01); -} - -static INLINE tran_low_t get_lower_coeff(tran_low_t qc) { - if (qc == 0) { - return 0; - } - return qc > 0 ? qc - 1 : qc + 1; -} - -static INLINE tran_low_t qcoeff_to_dqcoeff(tran_low_t qc, int coeff_idx, - int dqv, int shift, - const qm_val_t *iqmatrix) { - int sign = qc < 0 ? -1 : 1; - if (iqmatrix != NULL) - dqv = - ((iqmatrix[coeff_idx] * dqv) + (1 << (AOM_QM_BITS - 1))) >> AOM_QM_BITS; - return sign * ((abs(qc) * dqv) >> shift); -} - -static INLINE int64_t get_coeff_dist(tran_low_t tcoeff, tran_low_t dqcoeff, - int shift) { - const int64_t diff = (tcoeff - dqcoeff) * (1 << shift); - const int64_t error = diff * diff; - return error; -} - -static const int8_t eob_to_pos_small[33] = { - 0, 1, 2, // 0-2 - 3, 3, // 3-4 - 4, 4, 4, 4, // 5-8 - 5, 5, 5, 5, 5, 5, 5, 5, // 9-16 - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 // 17-32 -}; - -static const int8_t eob_to_pos_large[17] = { - 6, // place holder - 7, // 33-64 - 8, 8, // 65-128 - 9, 9, 9, 9, // 129-256 - 10, 10, 10, 10, 10, 10, 10, 10, // 257-512 - 11 // 513- -}; - -static INLINE int get_eob_pos_token(const int eob, int *const extra) { - int t; - - if (eob < 33) { - t = eob_to_pos_small[eob]; - } else { - const int e = AOMMIN((eob - 1) >> 5, 16); - t = eob_to_pos_large[e]; - } - - *extra = eob - k_eob_group_start[t]; - - return t; -} - -#if CONFIG_ENTROPY_STATS -void av1_update_eob_context(int cdf_idx, int eob, TX_SIZE tx_size, - TX_CLASS tx_class, PLANE_TYPE plane, - FRAME_CONTEXT *ec_ctx, FRAME_COUNTS *counts, - uint8_t allow_update_cdf) { -#else -void av1_update_eob_context(int eob, TX_SIZE tx_size, TX_CLASS tx_class, - PLANE_TYPE plane, FRAME_CONTEXT *ec_ctx, - uint8_t allow_update_cdf) { -#endif - int eob_extra; - const int eob_pt = get_eob_pos_token(eob, &eob_extra); - TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); - - const int eob_multi_size = txsize_log2_minus4[tx_size]; - const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1; - - switch (eob_multi_size) { - case 0: -#if CONFIG_ENTROPY_STATS - ++counts->eob_multi16[cdf_idx][plane][eob_multi_ctx][eob_pt - 1]; -#endif - if (allow_update_cdf) - update_cdf(ec_ctx->eob_flag_cdf16[plane][eob_multi_ctx], eob_pt - 1, 5); - break; - case 1: -#if CONFIG_ENTROPY_STATS - ++counts->eob_multi32[cdf_idx][plane][eob_multi_ctx][eob_pt - 1]; -#endif - if (allow_update_cdf) - update_cdf(ec_ctx->eob_flag_cdf32[plane][eob_multi_ctx], eob_pt - 1, 6); - break; - case 2: -#if CONFIG_ENTROPY_STATS - ++counts->eob_multi64[cdf_idx][plane][eob_multi_ctx][eob_pt - 1]; -#endif - if (allow_update_cdf) - update_cdf(ec_ctx->eob_flag_cdf64[plane][eob_multi_ctx], eob_pt - 1, 7); - break; - case 3: -#if CONFIG_ENTROPY_STATS - ++counts->eob_multi128[cdf_idx][plane][eob_multi_ctx][eob_pt - 1]; -#endif - if (allow_update_cdf) { - update_cdf(ec_ctx->eob_flag_cdf128[plane][eob_multi_ctx], eob_pt - 1, - 8); - } - break; - case 4: -#if CONFIG_ENTROPY_STATS - ++counts->eob_multi256[cdf_idx][plane][eob_multi_ctx][eob_pt - 1]; -#endif - if (allow_update_cdf) { - update_cdf(ec_ctx->eob_flag_cdf256[plane][eob_multi_ctx], eob_pt - 1, - 9); - } - break; - case 5: -#if CONFIG_ENTROPY_STATS - ++counts->eob_multi512[cdf_idx][plane][eob_multi_ctx][eob_pt - 1]; -#endif - if (allow_update_cdf) { - update_cdf(ec_ctx->eob_flag_cdf512[plane][eob_multi_ctx], eob_pt - 1, - 10); - } - break; - case 6: - default: -#if CONFIG_ENTROPY_STATS - ++counts->eob_multi1024[cdf_idx][plane][eob_multi_ctx][eob_pt - 1]; -#endif - if (allow_update_cdf) { - update_cdf(ec_ctx->eob_flag_cdf1024[plane][eob_multi_ctx], eob_pt - 1, - 11); - } - break; - } - - if (k_eob_offset_bits[eob_pt] > 0) { - int eob_ctx = eob_pt - 3; - int eob_shift = k_eob_offset_bits[eob_pt] - 1; - int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; -#if CONFIG_ENTROPY_STATS - counts->eob_extra[cdf_idx][txs_ctx][plane][eob_pt][bit]++; -#endif // CONFIG_ENTROPY_STATS - if (allow_update_cdf) - update_cdf(ec_ctx->eob_extra_cdf[txs_ctx][plane][eob_ctx], bit, 2); - } -} - -static int get_eob_cost(int eob, const LV_MAP_EOB_COST *txb_eob_costs, - const LV_MAP_COEFF_COST *txb_costs, TX_CLASS tx_class) { - int eob_extra; - const int eob_pt = get_eob_pos_token(eob, &eob_extra); - int eob_cost = 0; - const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1; - eob_cost = txb_eob_costs->eob_cost[eob_multi_ctx][eob_pt - 1]; - - if (k_eob_offset_bits[eob_pt] > 0) { - const int eob_ctx = eob_pt - 3; - const int eob_shift = k_eob_offset_bits[eob_pt] - 1; - const int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; - eob_cost += txb_costs->eob_extra_cost[eob_ctx][bit]; - const int offset_bits = k_eob_offset_bits[eob_pt]; - if (offset_bits > 1) eob_cost += av1_cost_literal(offset_bits - 1); - } - return eob_cost; -} - -static INLINE int get_sign_bit_cost(tran_low_t qc, int coeff_idx, - const int (*dc_sign_cost)[2], - int dc_sign_ctx) { - if (coeff_idx == 0) { - const int sign = (qc < 0) ? 1 : 0; - return dc_sign_cost[dc_sign_ctx][sign]; - } - return av1_cost_literal(1); -} - -static INLINE int get_br_cost(tran_low_t abs_qc, int ctx, - const int *coeff_lps) { - const tran_low_t min_level = 1 + NUM_BASE_LEVELS; - const tran_low_t max_level = 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE; - (void)ctx; - if (abs_qc >= min_level) { - if (abs_qc >= max_level) { - return coeff_lps[COEFF_BASE_RANGE]; // COEFF_BASE_RANGE * cost0; - } else { - return coeff_lps[(abs_qc - min_level)]; // * cost0 + cost1; - } - } - return 0; -} - -static INLINE int get_golomb_cost(int abs_qc) { - if (abs_qc >= 1 + NUM_BASE_LEVELS + COEFF_BASE_RANGE) { - const int r = abs_qc - COEFF_BASE_RANGE - NUM_BASE_LEVELS; - const int length = get_msb(r) + 1; - return av1_cost_literal(2 * length - 1); - } - return 0; -} - -static int get_coeff_cost(const tran_low_t qc, const int scan_idx, - const int is_eob, const TxbInfo *const txb_info, - const LV_MAP_COEFF_COST *const txb_costs, - const int coeff_ctx, const TX_CLASS tx_class) { - const TXB_CTX *const txb_ctx = txb_info->txb_ctx; - const int is_nz = (qc != 0); - const tran_low_t abs_qc = abs(qc); - int cost = 0; - const int16_t *const scan = txb_info->scan_order->scan; - const int pos = scan[scan_idx]; - - if (is_eob) { - cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1]; - } else { - cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)]; - } - if (is_nz) { - cost += get_sign_bit_cost(qc, scan_idx, txb_costs->dc_sign_cost, - txb_ctx->dc_sign_ctx); - - if (abs_qc > NUM_BASE_LEVELS) { - const int ctx = - get_br_ctx(txb_info->levels, pos, txb_info->bwl, tx_class); - cost += get_br_cost(abs_qc, ctx, txb_costs->lps_cost[ctx]); - cost += get_golomb_cost(abs_qc); - } - } - return cost; -} - -static INLINE int get_nz_map_ctx(const uint8_t *const levels, - const int coeff_idx, const int bwl, - const int height, const int scan_idx, - const int is_eob, const TX_SIZE tx_size, - const TX_CLASS tx_class) { - if (is_eob) { - if (scan_idx == 0) return 0; - if (scan_idx <= (height << bwl) / 8) return 1; - if (scan_idx <= (height << bwl) / 4) return 2; - return 3; - } - const int stats = - get_nz_mag(levels + get_padded_idx(coeff_idx, bwl), bwl, tx_class); - return get_nz_map_ctx_from_stats(stats, coeff_idx, bwl, tx_size, tx_class); -} - -static void get_dist_cost_stats(LevelDownStats *const stats, const int scan_idx, - const int is_eob, - const LV_MAP_COEFF_COST *const txb_costs, - const TxbInfo *const txb_info, - const TX_CLASS tx_class) { - const int16_t *const scan = txb_info->scan_order->scan; - const int coeff_idx = scan[scan_idx]; - const tran_low_t qc = txb_info->qcoeff[coeff_idx]; - const uint8_t *const levels = txb_info->levels; - stats->new_eob = -1; - stats->update = 0; - stats->rd_low = 0; - stats->rd = 0; - stats->nz_rd = 0; - stats->dist_low = 0; - stats->rate_low = 0; - stats->low_qc = 0; - - const tran_low_t tqc = txb_info->tcoeff[coeff_idx]; - const int dqv = txb_info->dequant[coeff_idx != 0]; - const int coeff_ctx = - get_nz_map_ctx(levels, coeff_idx, txb_info->bwl, txb_info->height, - scan_idx, is_eob, txb_info->tx_size, tx_class); - const int qc_cost = get_coeff_cost(qc, scan_idx, is_eob, txb_info, txb_costs, - coeff_ctx, tx_class); - assert(qc != 0); - const tran_low_t dqc = qcoeff_to_dqcoeff(qc, coeff_idx, dqv, txb_info->shift, - txb_info->iqmatrix); - const int64_t dqc_dist = get_coeff_dist(tqc, dqc, txb_info->shift); - - // distortion difference when coefficient is quantized to 0 - const tran_low_t dqc0 = - qcoeff_to_dqcoeff(0, coeff_idx, dqv, txb_info->shift, txb_info->iqmatrix); - - stats->dist0 = get_coeff_dist(tqc, dqc0, txb_info->shift); - stats->dist = dqc_dist - stats->dist0; - stats->rate = qc_cost; - - stats->rd = RDCOST(txb_info->rdmult, stats->rate, stats->dist); - - stats->low_qc = get_lower_coeff(qc); - - if (is_eob && stats->low_qc == 0) { - stats->rd_low = stats->rd; // disable selection of low_qc in this case. - } else { - if (stats->low_qc == 0) { - stats->dist_low = 0; - } else { - stats->low_dqc = qcoeff_to_dqcoeff(stats->low_qc, coeff_idx, dqv, - txb_info->shift, txb_info->iqmatrix); - const int64_t low_dqc_dist = - get_coeff_dist(tqc, stats->low_dqc, txb_info->shift); - stats->dist_low = low_dqc_dist - stats->dist0; - } - const int low_qc_cost = - get_coeff_cost(stats->low_qc, scan_idx, is_eob, txb_info, txb_costs, - coeff_ctx, tx_class); - stats->rate_low = low_qc_cost; - stats->rd_low = RDCOST(txb_info->rdmult, stats->rate_low, stats->dist_low); - } -} - -static void get_dist_cost_stats_with_eob( - LevelDownStats *const stats, const int scan_idx, - const LV_MAP_COEFF_COST *const txb_costs, const TxbInfo *const txb_info, - const TX_CLASS tx_class) { - const int is_eob = 0; - get_dist_cost_stats(stats, scan_idx, is_eob, txb_costs, txb_info, tx_class); - - const int16_t *const scan = txb_info->scan_order->scan; - const int coeff_idx = scan[scan_idx]; - const tran_low_t qc = txb_info->qcoeff[coeff_idx]; - const int coeff_ctx_temp = get_nz_map_ctx( - txb_info->levels, coeff_idx, txb_info->bwl, txb_info->height, scan_idx, 1, - txb_info->tx_size, tx_class); - const int qc_eob_cost = get_coeff_cost(qc, scan_idx, 1, txb_info, txb_costs, - coeff_ctx_temp, tx_class); - int64_t rd_eob = RDCOST(txb_info->rdmult, qc_eob_cost, stats->dist); - if (stats->low_qc != 0) { - const int low_qc_eob_cost = - get_coeff_cost(stats->low_qc, scan_idx, 1, txb_info, txb_costs, - coeff_ctx_temp, tx_class); - int64_t rd_eob_low = - RDCOST(txb_info->rdmult, low_qc_eob_cost, stats->dist_low); - rd_eob = (rd_eob > rd_eob_low) ? rd_eob_low : rd_eob; - } - - stats->nz_rd = AOMMIN(stats->rd_low, stats->rd) - rd_eob; -} - -static INLINE void update_qcoeff(const int coeff_idx, const tran_low_t qc, - const TxbInfo *const txb_info) { - txb_info->qcoeff[coeff_idx] = qc; - txb_info->levels[get_padded_idx(coeff_idx, txb_info->bwl)] = - (uint8_t)clamp(abs(qc), 0, INT8_MAX); -} - -static INLINE void update_coeff(const int coeff_idx, const tran_low_t qc, - const TxbInfo *const txb_info) { - update_qcoeff(coeff_idx, qc, txb_info); - const int dqv = txb_info->dequant[coeff_idx != 0]; - txb_info->dqcoeff[coeff_idx] = qcoeff_to_dqcoeff( - qc, coeff_idx, dqv, txb_info->shift, txb_info->iqmatrix); -} - -void av1_txb_init_levels_c(const tran_low_t *const coeff, const int width, - const int height, uint8_t *const levels) { - const int stride = width + TX_PAD_HOR; - uint8_t *ls = levels; - - memset(levels - TX_PAD_TOP * stride, 0, - sizeof(*levels) * TX_PAD_TOP * stride); - memset(levels + stride * height, 0, - sizeof(*levels) * (TX_PAD_BOTTOM * stride + TX_PAD_END)); - - for (int i = 0; i < height; i++) { - for (int j = 0; j < width; j++) { - *ls++ = (uint8_t)clamp(abs(coeff[i * width + j]), 0, INT8_MAX); - } - for (int j = 0; j < TX_PAD_HOR; j++) { - *ls++ = 0; - } - } -} - -void av1_get_nz_map_contexts_c(const uint8_t *const levels, - const int16_t *const scan, const uint16_t eob, - const TX_SIZE tx_size, const TX_CLASS tx_class, - int8_t *const coeff_contexts) { - const int bwl = get_txb_bwl(tx_size); - const int height = get_txb_high(tx_size); - for (int i = 0; i < eob; ++i) { - const int pos = scan[i]; - coeff_contexts[pos] = get_nz_map_ctx(levels, pos, bwl, height, i, - i == eob - 1, tx_size, tx_class); - } -} - -void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd, - aom_writer *w, int blk_row, int blk_col, int plane, - TX_SIZE tx_size, const tran_low_t *tcoeff, - uint16_t eob, TXB_CTX *txb_ctx) { - const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; - aom_write_symbol(w, eob == 0, - ec_ctx->txb_skip_cdf[txs_ctx][txb_ctx->txb_skip_ctx], 2); - if (eob == 0) return; - const PLANE_TYPE plane_type = get_plane_type(plane); - const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, - tx_size, cm->reduced_tx_set_used); - const TX_CLASS tx_class = tx_type_to_class[tx_type]; - const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); - const int16_t *const scan = scan_order->scan; - int c; - const int bwl = get_txb_bwl(tx_size); - const int width = get_txb_wide(tx_size); - const int height = get_txb_high(tx_size); - - uint8_t levels_buf[TX_PAD_2D]; - uint8_t *const levels = set_levels(levels_buf, width); - DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); - av1_txb_init_levels(tcoeff, width, height, levels); - - av1_write_tx_type(cm, xd, blk_row, blk_col, plane, tx_size, w); - - int eob_extra; - const int eob_pt = get_eob_pos_token(eob, &eob_extra); - const int eob_multi_size = txsize_log2_minus4[tx_size]; - const int eob_multi_ctx = (tx_class == TX_CLASS_2D) ? 0 : 1; - switch (eob_multi_size) { - case 0: - aom_write_symbol(w, eob_pt - 1, - ec_ctx->eob_flag_cdf16[plane_type][eob_multi_ctx], 5); - break; - case 1: - aom_write_symbol(w, eob_pt - 1, - ec_ctx->eob_flag_cdf32[plane_type][eob_multi_ctx], 6); - break; - case 2: - aom_write_symbol(w, eob_pt - 1, - ec_ctx->eob_flag_cdf64[plane_type][eob_multi_ctx], 7); - break; - case 3: - aom_write_symbol(w, eob_pt - 1, - ec_ctx->eob_flag_cdf128[plane_type][eob_multi_ctx], 8); - break; - case 4: - aom_write_symbol(w, eob_pt - 1, - ec_ctx->eob_flag_cdf256[plane_type][eob_multi_ctx], 9); - break; - case 5: - aom_write_symbol(w, eob_pt - 1, - ec_ctx->eob_flag_cdf512[plane_type][eob_multi_ctx], 10); - break; - default: - aom_write_symbol(w, eob_pt - 1, - ec_ctx->eob_flag_cdf1024[plane_type][eob_multi_ctx], 11); - break; - } - - if (k_eob_offset_bits[eob_pt] > 0) { - const int eob_ctx = eob_pt - 3; - int eob_shift = k_eob_offset_bits[eob_pt] - 1; - int bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; - aom_write_symbol(w, bit, - ec_ctx->eob_extra_cdf[txs_ctx][plane_type][eob_ctx], 2); - for (int i = 1; i < k_eob_offset_bits[eob_pt]; i++) { - eob_shift = k_eob_offset_bits[eob_pt] - 1 - i; - bit = (eob_extra & (1 << eob_shift)) ? 1 : 0; - aom_write_bit(w, bit); - } - } - - av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts); - - for (c = eob - 1; c >= 0; --c) { - const int pos = scan[c]; - const int coeff_ctx = coeff_contexts[pos]; - const tran_low_t v = tcoeff[pos]; - const tran_low_t level = abs(v); - - if (c == eob - 1) { - aom_write_symbol( - w, AOMMIN(level, 3) - 1, - ec_ctx->coeff_base_eob_cdf[txs_ctx][plane_type][coeff_ctx], 3); - } else { - aom_write_symbol(w, AOMMIN(level, 3), - ec_ctx->coeff_base_cdf[txs_ctx][plane_type][coeff_ctx], - 4); - } - if (level > NUM_BASE_LEVELS) { - // level is above 1. - const int base_range = level - 1 - NUM_BASE_LEVELS; - const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class); - for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { - const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1); - aom_write_symbol( - w, k, - ec_ctx->coeff_br_cdf[AOMMIN(txs_ctx, TX_32X32)][plane_type][br_ctx], - BR_CDF_SIZE); - if (k < BR_CDF_SIZE - 1) break; - } - } - } - - // Loop to code all signs in the transform block, - // starting with the sign of DC (if applicable) - for (c = 0; c < eob; ++c) { - const tran_low_t v = tcoeff[scan[c]]; - const tran_low_t level = abs(v); - const int sign = (v < 0) ? 1 : 0; - if (level) { - if (c == 0) { - aom_write_symbol( - w, sign, ec_ctx->dc_sign_cdf[plane_type][txb_ctx->dc_sign_ctx], 2); - } else { - aom_write_bit(w, sign); - } - if (level > COEFF_BASE_RANGE + NUM_BASE_LEVELS) - write_golomb(w, level - COEFF_BASE_RANGE - 1 - NUM_BASE_LEVELS); - } - } -} - -typedef struct encode_txb_args { - const AV1_COMMON *cm; - MACROBLOCK *x; - aom_writer *w; -} ENCODE_TXB_ARGS; - -static void write_coeffs_txb_wrap(const AV1_COMMON *cm, MACROBLOCK *x, - aom_writer *w, int plane, int block, - int blk_row, int blk_col, TX_SIZE tx_size) { - MACROBLOCKD *xd = &x->e_mbd; - tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block); - uint16_t eob = x->mbmi_ext->eobs[plane][block]; - TXB_CTX txb_ctx = { x->mbmi_ext->txb_skip_ctx[plane][block], - x->mbmi_ext->dc_sign_ctx[plane][block] }; - av1_write_coeffs_txb(cm, xd, w, blk_row, blk_col, plane, tx_size, tcoeff, eob, - &txb_ctx); -} - -void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row, - int mi_col, aom_writer *w, BLOCK_SIZE bsize) { - MACROBLOCKD *xd = &x->e_mbd; - const int num_planes = av1_num_planes(cm); - int block[MAX_MB_PLANE] = { 0 }; - int row, col; - assert(bsize == get_plane_block_size(bsize, xd->plane[0].subsampling_x, - xd->plane[0].subsampling_y)); - const int max_blocks_wide = max_block_wide(xd, bsize, 0); - const int max_blocks_high = max_block_high(xd, bsize, 0); - const BLOCK_SIZE max_unit_bsize = BLOCK_64X64; - int mu_blocks_wide = block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0]; - int mu_blocks_high = block_size_high[max_unit_bsize] >> tx_size_high_log2[0]; - mu_blocks_wide = AOMMIN(max_blocks_wide, mu_blocks_wide); - mu_blocks_high = AOMMIN(max_blocks_high, mu_blocks_high); - - for (row = 0; row < max_blocks_high; row += mu_blocks_high) { - for (col = 0; col < max_blocks_wide; col += mu_blocks_wide) { - for (int plane = 0; plane < num_planes; ++plane) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x, - pd->subsampling_y)) - continue; - const TX_SIZE tx_size = av1_get_tx_size(plane, xd); - const int stepr = tx_size_high_unit[tx_size]; - const int stepc = tx_size_wide_unit[tx_size]; - const int step = stepr * stepc; - - const int unit_height = ROUND_POWER_OF_TWO( - AOMMIN(mu_blocks_high + row, max_blocks_high), pd->subsampling_y); - const int unit_width = ROUND_POWER_OF_TWO( - AOMMIN(mu_blocks_wide + col, max_blocks_wide), pd->subsampling_x); - for (int blk_row = row >> pd->subsampling_y; blk_row < unit_height; - blk_row += stepr) { - for (int blk_col = col >> pd->subsampling_x; blk_col < unit_width; - blk_col += stepc) { - write_coeffs_txb_wrap(cm, x, w, plane, block[plane], blk_row, - blk_col, tx_size); - block[plane] += step; - } - } - } - } - } -} - -// TODO(angiebird): use this function whenever it's possible -static int get_tx_type_cost(const AV1_COMMON *cm, const MACROBLOCK *x, - const MACROBLOCKD *xd, int plane, TX_SIZE tx_size, - TX_TYPE tx_type) { - if (plane > 0) return 0; - - const TX_SIZE square_tx_size = txsize_sqr_map[tx_size]; - - const MB_MODE_INFO *mbmi = xd->mi[0]; - const int is_inter = is_inter_block(mbmi); - if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 && - !xd->lossless[xd->mi[0]->segment_id]) { - const int ext_tx_set = - get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used); - if (is_inter) { - if (ext_tx_set > 0) - return x->inter_tx_type_costs[ext_tx_set][square_tx_size][tx_type]; - } else { - if (ext_tx_set > 0) { - PREDICTION_MODE intra_dir; - if (mbmi->filter_intra_mode_info.use_filter_intra) - intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info - .filter_intra_mode]; - else - intra_dir = mbmi->mode; - return x->intra_tx_type_costs[ext_tx_set][square_tx_size][intra_dir] - [tx_type]; - } - } - } - return 0; -} - -static AOM_FORCE_INLINE int warehouse_efficients_txb( - const AV1_COMMON *const cm, const MACROBLOCK *x, const int plane, - const int block, const TX_SIZE tx_size, const TXB_CTX *const txb_ctx, - const struct macroblock_plane *p, const int eob, - const PLANE_TYPE plane_type, const LV_MAP_COEFF_COST *const coeff_costs, - const MACROBLOCKD *const xd, const TX_TYPE tx_type, - const TX_CLASS tx_class) { - const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); - const int txb_skip_ctx = txb_ctx->txb_skip_ctx; - const int bwl = get_txb_bwl(tx_size); - const int width = get_txb_wide(tx_size); - const int height = get_txb_high(tx_size); - const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); - const int16_t *const scan = scan_order->scan; - uint8_t levels_buf[TX_PAD_2D]; - uint8_t *const levels = set_levels(levels_buf, width); - DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); - const int eob_multi_size = txsize_log2_minus4[tx_size]; - const LV_MAP_EOB_COST *const eob_costs = - &x->eob_costs[eob_multi_size][plane_type]; - int cost = coeff_costs->txb_skip_cost[txb_skip_ctx][0]; - - av1_txb_init_levels(qcoeff, width, height, levels); - - cost += get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type); - - cost += get_eob_cost(eob, eob_costs, coeff_costs, tx_class); - - av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts); - - const int(*lps_cost)[COEFF_BASE_RANGE + 1] = coeff_costs->lps_cost; - int c = eob - 1; - { - const int pos = scan[c]; - const tran_low_t v = qcoeff[pos]; - const int sign = v >> 31; - const int level = (v ^ sign) - sign; - const int coeff_ctx = coeff_contexts[pos]; - cost += coeff_costs->base_eob_cost[coeff_ctx][AOMMIN(level, 3) - 1]; - - if (v) { - // sign bit cost - if (level > NUM_BASE_LEVELS) { - const int ctx = get_br_ctx(levels, pos, bwl, tx_class); - const int base_range = - AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE); - cost += lps_cost[ctx][base_range]; - cost += get_golomb_cost(level); - } - if (c) { - cost += av1_cost_literal(1); - } else { - const int sign01 = (sign ^ sign) - sign; - const int dc_sign_ctx = txb_ctx->dc_sign_ctx; - cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01]; - return cost; - } - } - } - const int(*base_cost)[4] = coeff_costs->base_cost; - for (c = eob - 2; c >= 1; --c) { - const int pos = scan[c]; - const int coeff_ctx = coeff_contexts[pos]; - const tran_low_t v = qcoeff[pos]; - const int level = abs(v); - const int cost0 = base_cost[coeff_ctx][AOMMIN(level, 3)]; - if (v) { - // sign bit cost - cost += av1_cost_literal(1); - if (level > NUM_BASE_LEVELS) { - const int ctx = get_br_ctx(levels, pos, bwl, tx_class); - const int base_range = - AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE); - cost += lps_cost[ctx][base_range]; - cost += get_golomb_cost(level); - } - } - cost += cost0; - } - if (c == 0) { - const int pos = scan[c]; - const tran_low_t v = qcoeff[pos]; - const int coeff_ctx = coeff_contexts[pos]; - const int sign = v >> 31; - const int level = (v ^ sign) - sign; - cost += base_cost[coeff_ctx][AOMMIN(level, 3)]; - - if (v) { - // sign bit cost - const int sign01 = (sign ^ sign) - sign; - const int dc_sign_ctx = txb_ctx->dc_sign_ctx; - cost += coeff_costs->dc_sign_cost[dc_sign_ctx][sign01]; - if (level > NUM_BASE_LEVELS) { - const int ctx = get_br_ctx(levels, pos, bwl, tx_class); - const int base_range = - AOMMIN(level - 1 - NUM_BASE_LEVELS, COEFF_BASE_RANGE); - cost += lps_cost[ctx][base_range]; - cost += get_golomb_cost(level); - } - } - } - return cost; -} - -int av1_cost_coeffs_txb(const AV1_COMMON *const cm, const MACROBLOCK *x, - const int plane, const int block, const TX_SIZE tx_size, - const TX_TYPE tx_type, const TXB_CTX *const txb_ctx) { - const struct macroblock_plane *p = &x->plane[plane]; - const int eob = p->eobs[block]; - const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); - const PLANE_TYPE plane_type = get_plane_type(plane); - const LV_MAP_COEFF_COST *const coeff_costs = - &x->coeff_costs[txs_ctx][plane_type]; - if (eob == 0) { - return coeff_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1]; - } - - const MACROBLOCKD *const xd = &x->e_mbd; - const TX_CLASS tx_class = tx_type_to_class[tx_type]; - -#define WAREHOUSE_EFFICIENTS_TXB_CASE(tx_class_literal) \ - case tx_class_literal: \ - return warehouse_efficients_txb(cm, x, plane, block, tx_size, txb_ctx, p, \ - eob, plane_type, coeff_costs, xd, tx_type, \ - tx_class_literal); - switch (tx_class) { - WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_2D); - WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_HORIZ); - WAREHOUSE_EFFICIENTS_TXB_CASE(TX_CLASS_VERT); -#undef WAREHOUSE_EFFICIENTS_TXB_CASE - default: assert(false); return 0; - } -} - -static int optimize_txb(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs, - const LV_MAP_EOB_COST *txb_eob_costs, int *rate_cost) { - int update = 0; - if (txb_info->eob == 0) return update; - const int16_t *const scan = txb_info->scan_order->scan; - // forward optimize the nz_map` - const int init_eob = txb_info->eob; - const TX_CLASS tx_class = tx_type_to_class[txb_info->tx_type]; - const int eob_cost = - get_eob_cost(init_eob, txb_eob_costs, txb_costs, tx_class); - - // backward optimize the level-k map - int accu_rate = eob_cost; - int64_t accu_dist = 0; - int64_t prev_eob_rd_cost = INT64_MAX; - int64_t cur_eob_rd_cost = 0; - - { - const int si = init_eob - 1; - const int coeff_idx = scan[si]; - LevelDownStats stats; - get_dist_cost_stats(&stats, si, si == init_eob - 1, txb_costs, txb_info, - tx_class); - if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) { - update = 1; - update_coeff(coeff_idx, stats.low_qc, txb_info); - accu_rate += stats.rate_low; - accu_dist += stats.dist_low; - } else { - accu_rate += stats.rate; - accu_dist += stats.dist; - } - } - - int si = init_eob - 2; - int8_t has_nz_tail = 0; - // eob is not fixed - for (; si >= 0 && has_nz_tail < 2; --si) { - assert(si != init_eob - 1); - const int coeff_idx = scan[si]; - tran_low_t qc = txb_info->qcoeff[coeff_idx]; - - if (qc == 0) { - const int coeff_ctx = - get_lower_levels_ctx(txb_info->levels, coeff_idx, txb_info->bwl, - txb_info->tx_size, tx_class); - accu_rate += txb_costs->base_cost[coeff_ctx][0]; - } else { - LevelDownStats stats; - get_dist_cost_stats_with_eob(&stats, si, txb_costs, txb_info, tx_class); - // check if it is better to make this the last significant coefficient - int cur_eob_rate = - get_eob_cost(si + 1, txb_eob_costs, txb_costs, tx_class); - cur_eob_rd_cost = RDCOST(txb_info->rdmult, cur_eob_rate, 0); - prev_eob_rd_cost = - RDCOST(txb_info->rdmult, accu_rate, accu_dist) + stats.nz_rd; - if (cur_eob_rd_cost <= prev_eob_rd_cost) { - update = 1; - for (int j = si + 1; j < txb_info->eob; j++) { - const int coeff_pos_j = scan[j]; - update_coeff(coeff_pos_j, 0, txb_info); - } - txb_info->eob = si + 1; - - // rerun cost calculation due to change of eob - accu_rate = cur_eob_rate; - accu_dist = 0; - get_dist_cost_stats(&stats, si, 1, txb_costs, txb_info, tx_class); - if ((stats.rd_low < stats.rd) && (stats.low_qc != 0)) { - update = 1; - update_coeff(coeff_idx, stats.low_qc, txb_info); - accu_rate += stats.rate_low; - accu_dist += stats.dist_low; - } else { - accu_rate += stats.rate; - accu_dist += stats.dist; - } - - // reset non zero tail when new eob is found - has_nz_tail = 0; - } else { - int bUpdCoeff = 0; - if (stats.rd_low < stats.rd) { - if ((si < txb_info->eob - 1)) { - bUpdCoeff = 1; - update = 1; - } - } else { - ++has_nz_tail; - } - - if (bUpdCoeff) { - update_coeff(coeff_idx, stats.low_qc, txb_info); - accu_rate += stats.rate_low; - accu_dist += stats.dist_low; - } else { - accu_rate += stats.rate; - accu_dist += stats.dist; - } - } - } - } // for (si) - - // eob is fixed - for (; si >= 0; --si) { - assert(si != init_eob - 1); - const int coeff_idx = scan[si]; - tran_low_t qc = txb_info->qcoeff[coeff_idx]; - - if (qc == 0) { - const int coeff_ctx = - get_lower_levels_ctx(txb_info->levels, coeff_idx, txb_info->bwl, - txb_info->tx_size, tx_class); - accu_rate += txb_costs->base_cost[coeff_ctx][0]; - } else { - LevelDownStats stats; - get_dist_cost_stats(&stats, si, 0, txb_costs, txb_info, tx_class); - - int bUpdCoeff = 0; - if (stats.rd_low < stats.rd) { - if ((si < txb_info->eob - 1)) { - bUpdCoeff = 1; - update = 1; - } - } - if (bUpdCoeff) { - update_coeff(coeff_idx, stats.low_qc, txb_info); - accu_rate += stats.rate_low; - accu_dist += stats.dist_low; - } else { - accu_rate += stats.rate; - accu_dist += stats.dist; - } - } - } // for (si) - - int non_zero_blk_rate = - txb_costs->txb_skip_cost[txb_info->txb_ctx->txb_skip_ctx][0]; - prev_eob_rd_cost = - RDCOST(txb_info->rdmult, accu_rate + non_zero_blk_rate, accu_dist); - - int zero_blk_rate = - txb_costs->txb_skip_cost[txb_info->txb_ctx->txb_skip_ctx][1]; - int64_t zero_blk_rd_cost = RDCOST(txb_info->rdmult, zero_blk_rate, 0); - if (zero_blk_rd_cost <= prev_eob_rd_cost) { - update = 1; - for (int j = 0; j < txb_info->eob; j++) { - const int coeff_pos_j = scan[j]; - update_coeff(coeff_pos_j, 0, txb_info); - } - txb_info->eob = 0; - } - - // record total rate cost - *rate_cost = zero_blk_rd_cost <= prev_eob_rd_cost - ? zero_blk_rate - : accu_rate + non_zero_blk_rate; - - if (txb_info->eob > 0) { - *rate_cost += txb_info->tx_type_cost; - } - - return update; -} - -// These numbers are empirically obtained. -static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = { - { 17, 13 }, - { 16, 10 }, -}; - -void hbt_init() { - hbt_hash_table = - aom_malloc(sizeof(OptTxbQcoeff) * HBT_TABLE_SIZE * HBT_ARRAY_LENGTH); - memset(hbt_hash_table, 0, - sizeof(OptTxbQcoeff) * HBT_TABLE_SIZE * HBT_ARRAY_LENGTH); - av1_crc32c_calculator_init(&crc_calculator); // 31 bit: qc & ctx - - hbt_needs_init = 0; -} - -void hbt_destroy() { aom_free(hbt_hash_table); } - -int hbt_hash_miss(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash, - TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs, - const LV_MAP_EOB_COST *txb_eob_costs, - const struct macroblock_plane *p, int block, int fast_mode, - int *rate_cost) { - (void)fast_mode; - const int16_t *scan = txb_info->scan_order->scan; - int prev_eob = txb_info->eob; - assert(HBT_EOB <= 16); // Lengthen array if allowing longer eob. - int32_t prev_coeff[16]; - for (int i = 0; i < prev_eob; i++) { - prev_coeff[i] = txb_info->qcoeff[scan[i]]; - } - for (int i = prev_eob; i < HBT_EOB; i++) { - prev_coeff[i] = 0; // For compiler piece of mind. - } - - av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height, - txb_info->levels); - - const int update = - optimize_txb(txb_info, txb_costs, txb_eob_costs, rate_cost); - - // Overwrite old entry - uint16_t hbt_table_index = hbt_ctx_hash % HBT_TABLE_SIZE; - uint16_t hbt_array_index = hbt_qc_hash % HBT_ARRAY_LENGTH; - hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .rate_cost = *rate_cost; - hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index].init = 1; - hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .hbt_qc_hash = hbt_qc_hash; - hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .hbt_ctx_hash = hbt_ctx_hash; - assert(prev_eob >= txb_info->eob); // eob can't get longer - for (int i = 0; i < txb_info->eob; i++) { - // Record how coeff changed. Convention: towards zero is negative. - if (txb_info->qcoeff[scan[i]] > 0) - hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .deltas[i] = txb_info->qcoeff[scan[i]] - prev_coeff[i]; - else - hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .deltas[i] = prev_coeff[i] - txb_info->qcoeff[scan[i]]; - } - for (int i = txb_info->eob; i < prev_eob; i++) { - // If eob got shorter, record that all after it changed to zero. - if (prev_coeff[i] > 0) - hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .deltas[i] = -prev_coeff[i]; - else - hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .deltas[i] = prev_coeff[i]; - } - for (int i = prev_eob; i < HBT_EOB; i++) { - // Record 'no change' after optimized coefficients run out. - hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .deltas[i] = 0; - } - - if (update) { - p->eobs[block] = txb_info->eob; - p->txb_entropy_ctx[block] = av1_get_txb_entropy_context( - txb_info->qcoeff, txb_info->scan_order, txb_info->eob); - } - return txb_info->eob; -} - -int hbt_hash_hit(uint32_t hbt_table_index, int hbt_array_index, - TxbInfo *txb_info, const struct macroblock_plane *p, int block, - int *rate_cost) { - const int16_t *scan = txb_info->scan_order->scan; - int new_eob = 0; - int update = 0; - - for (int i = 0; i < txb_info->eob; i++) { - // Delta convention is negatives go towards zero, so only apply those ones. - if (hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .deltas[i] < 0) { - if (txb_info->qcoeff[scan[i]] > 0) - txb_info->qcoeff[scan[i]] += - hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .deltas[i]; - else - txb_info->qcoeff[scan[i]] -= - hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .deltas[i]; - - update = 1; - update_coeff(scan[i], txb_info->qcoeff[scan[i]], txb_info); - } - if (txb_info->qcoeff[scan[i]]) new_eob = i + 1; - } - - // Rate_cost can be calculated here instead (av1_cost_coeffs_txb), but - // it is expensive and gives little benefit as long as qc_hash is high bit - *rate_cost = - hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .rate_cost; - - if (update) { - txb_info->eob = new_eob; - p->eobs[block] = txb_info->eob; - p->txb_entropy_ctx[block] = av1_get_txb_entropy_context( - txb_info->qcoeff, txb_info->scan_order, txb_info->eob); - } - - return txb_info->eob; -} - -int hbt_search_match(uint32_t hbt_ctx_hash, uint32_t hbt_qc_hash, - TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs, - const LV_MAP_EOB_COST *txb_eob_costs, - const struct macroblock_plane *p, int block, int fast_mode, - int *rate_cost) { - // Check for qcoeff match - int hbt_array_index = hbt_qc_hash % HBT_ARRAY_LENGTH; - int hbt_table_index = hbt_ctx_hash % HBT_TABLE_SIZE; - - if (hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .hbt_qc_hash == hbt_qc_hash && - hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .hbt_ctx_hash == hbt_ctx_hash && - hbt_hash_table[hbt_table_index * HBT_ARRAY_LENGTH + hbt_array_index] - .init) { - return hbt_hash_hit(hbt_table_index, hbt_array_index, txb_info, p, block, - rate_cost); - } else { - return hbt_hash_miss(hbt_ctx_hash, hbt_qc_hash, txb_info, txb_costs, - txb_eob_costs, p, block, fast_mode, rate_cost); - } -} - -int hbt_create_hashes(TxbInfo *txb_info, const LV_MAP_COEFF_COST *txb_costs, - const LV_MAP_EOB_COST *txb_eob_costs, - const struct macroblock_plane *p, int block, - int fast_mode, int *rate_cost) { - // Initialize hash table if needed. - if (hbt_needs_init) { - hbt_init(); - } - - //// Hash creation - uint8_t txb_hash_data[256]; // Asserts below to ensure enough space. - const int16_t *scan = txb_info->scan_order->scan; - uint8_t chunk = 0; - int hash_data_index = 0; - - // Make qc_hash. - int packing_index = 0; // needed for packing. - for (int i = 0; i < txb_info->eob; i++) { - tran_low_t prechunk = txb_info->qcoeff[scan[i]]; - - // Softening: Improves speed. Aligns with signed deltas. - if (prechunk < 0) prechunk *= -1; - - // Early kick out: Don't apply feature if there are large coeffs: - // If this kickout value is removed or raised beyond int8_t, - // widen deltas type in OptTxbQcoeff struct. - assert((int8_t)HBT_KICKOUT == HBT_KICKOUT); // If not, widen types. - if (prechunk > HBT_KICKOUT) { - av1_txb_init_levels(txb_info->qcoeff, txb_info->width, txb_info->height, - txb_info->levels); - - const int update = - optimize_txb(txb_info, txb_costs, txb_eob_costs, rate_cost); - - if (update) { - p->eobs[block] = txb_info->eob; - p->txb_entropy_ctx[block] = av1_get_txb_entropy_context( - txb_info->qcoeff, txb_info->scan_order, txb_info->eob); - } - return txb_info->eob; - } - - // Since coeffs are 0 to 3, only 2 bits are needed: pack into bytes - if (packing_index == 0) txb_hash_data[hash_data_index] = 0; - chunk = prechunk << packing_index; - packing_index += 2; - txb_hash_data[hash_data_index] |= chunk; - - // Full byte: - if (packing_index == 8) { - packing_index = 0; - hash_data_index++; - } - } - // Needed when packing_index != 0, to include final byte. - hash_data_index++; - assert(hash_data_index <= 64); - // 31 bit qc_hash: index to array - uint32_t hbt_qc_hash = - av1_get_crc32c_value(&crc_calculator, txb_hash_data, hash_data_index); - - // Make ctx_hash. - hash_data_index = 0; - tran_low_t prechunk; - - for (int i = 0; i < txb_info->eob; i++) { - // Save as magnitudes towards or away from zero. - if (txb_info->tcoeff[scan[i]] >= 0) - prechunk = txb_info->tcoeff[scan[i]] - txb_info->dqcoeff[scan[i]]; - else - prechunk = txb_info->dqcoeff[scan[i]] - txb_info->tcoeff[scan[i]]; - - chunk = prechunk & 0xff; - txb_hash_data[hash_data_index++] = chunk; - } - - // Extra ctx data: - // Include dequants. - txb_hash_data[hash_data_index++] = txb_info->dequant[0] & 0xff; - txb_hash_data[hash_data_index++] = txb_info->dequant[1] & 0xff; - chunk = txb_info->txb_ctx->txb_skip_ctx & 0xff; - txb_hash_data[hash_data_index++] = chunk; - chunk = txb_info->txb_ctx->dc_sign_ctx & 0xff; - txb_hash_data[hash_data_index++] = chunk; - // eob - chunk = txb_info->eob & 0xff; - txb_hash_data[hash_data_index++] = chunk; - // rdmult (int64) - chunk = txb_info->rdmult & 0xff; - txb_hash_data[hash_data_index++] = chunk; - // tx_type - chunk = txb_info->tx_type & 0xff; - txb_hash_data[hash_data_index++] = chunk; - // base_eob_cost - for (int i = 1; i < 3; i++) { // i = 0 are softened away - for (int j = 0; j < SIG_COEF_CONTEXTS_EOB; j++) { - chunk = (txb_costs->base_eob_cost[j][i] & 0xff00) >> 8; - txb_hash_data[hash_data_index++] = chunk; - } - } - // eob_cost - for (int i = 0; i < 11; i++) { - for (int j = 0; j < 2; j++) { - chunk = (txb_eob_costs->eob_cost[j][i] & 0xff00) >> 8; - txb_hash_data[hash_data_index++] = chunk; - } - } - // dc_sign_cost - for (int i = 0; i < 2; i++) { - for (int j = 0; j < DC_SIGN_CONTEXTS; j++) { - chunk = (txb_costs->dc_sign_cost[j][i] & 0xff00) >> 8; - txb_hash_data[hash_data_index++] = chunk; - } - } - - assert(hash_data_index <= 256); - // 31 bit ctx_hash: used to index table - uint32_t hbt_ctx_hash = - av1_get_crc32c_value(&crc_calculator, txb_hash_data, hash_data_index); - //// End hash creation - - return hbt_search_match(hbt_ctx_hash, hbt_qc_hash, txb_info, txb_costs, - txb_eob_costs, p, block, fast_mode, rate_cost); -} - -static AOM_FORCE_INLINE int get_coeff_cost_simple( - int ci, tran_low_t abs_qc, int coeff_ctx, - const LV_MAP_COEFF_COST *txb_costs, int bwl, TX_CLASS tx_class, - const uint8_t *levels) { - // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0) - // and not the last (scan_idx != eob - 1) - assert(ci > 0); - int cost = txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)]; - if (abs_qc) { - cost += av1_cost_literal(1); - if (abs_qc > NUM_BASE_LEVELS) { - const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class); - cost += get_br_cost(abs_qc, br_ctx, txb_costs->lps_cost[br_ctx]); - cost += get_golomb_cost(abs_qc); - } - } - return cost; -} - -static INLINE int get_coeff_cost_general(int is_last, int ci, tran_low_t abs_qc, - int sign, int coeff_ctx, - int dc_sign_ctx, - const LV_MAP_COEFF_COST *txb_costs, - int bwl, TX_CLASS tx_class, - const uint8_t *levels) { - int cost = 0; - if (is_last) { - cost += txb_costs->base_eob_cost[coeff_ctx][AOMMIN(abs_qc, 3) - 1]; - } else { - cost += txb_costs->base_cost[coeff_ctx][AOMMIN(abs_qc, 3)]; - } - if (abs_qc != 0) { - if (ci == 0) { - cost += txb_costs->dc_sign_cost[dc_sign_ctx][sign]; - } else { - cost += av1_cost_literal(1); - } - if (abs_qc > NUM_BASE_LEVELS) { - const int br_ctx = get_br_ctx(levels, ci, bwl, tx_class); - cost += get_br_cost(abs_qc, br_ctx, txb_costs->lps_cost[br_ctx]); - cost += get_golomb_cost(abs_qc); - } - } - return cost; -} - -static INLINE void get_qc_dqc_low(tran_low_t abs_qc, int sign, int dqv, - int shift, tran_low_t *qc_low, - tran_low_t *dqc_low) { - tran_low_t abs_qc_low = abs_qc - 1; - *qc_low = (-sign ^ abs_qc_low) + sign; - assert((sign ? -abs_qc_low : abs_qc_low) == *qc_low); - tran_low_t abs_dqc_low = (abs_qc_low * dqv) >> shift; - *dqc_low = (-sign ^ abs_dqc_low) + sign; - assert((sign ? -abs_dqc_low : abs_dqc_low) == *dqc_low); -} - -static INLINE void update_coeff_general( - int *accu_rate, int64_t *accu_dist, int si, int eob, TX_SIZE tx_size, - TX_CLASS tx_class, int bwl, int height, int64_t rdmult, int shift, - int dc_sign_ctx, const int16_t *dequant, const int16_t *scan, - const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff, - tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels) { - const int dqv = dequant[si != 0]; - const int ci = scan[si]; - const tran_low_t qc = qcoeff[ci]; - const int is_last = si == (eob - 1); - const int coeff_ctx = get_lower_levels_ctx_general( - is_last, si, bwl, height, levels, ci, tx_size, tx_class); - if (qc == 0) { - *accu_rate += txb_costs->base_cost[coeff_ctx][0]; - } else { - const int sign = (qc < 0) ? 1 : 0; - const tran_low_t abs_qc = abs(qc); - const tran_low_t tqc = tcoeff[ci]; - const tran_low_t dqc = dqcoeff[ci]; - const int64_t dist = get_coeff_dist(tqc, dqc, shift); - const int64_t dist0 = get_coeff_dist(tqc, 0, shift); - const int rate = - get_coeff_cost_general(is_last, ci, abs_qc, sign, coeff_ctx, - dc_sign_ctx, txb_costs, bwl, tx_class, levels); - const int64_t rd = RDCOST(rdmult, rate, dist); - - tran_low_t qc_low, dqc_low; - get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); - const tran_low_t abs_qc_low = abs_qc - 1; - const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift); - const int rate_low = - get_coeff_cost_general(is_last, ci, abs_qc_low, sign, coeff_ctx, - dc_sign_ctx, txb_costs, bwl, tx_class, levels); - const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low); - if (rd_low < rd) { - qcoeff[ci] = qc_low; - dqcoeff[ci] = dqc_low; - levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX); - *accu_rate += rate_low; - *accu_dist += dist_low - dist0; - } else { - *accu_rate += rate; - *accu_dist += dist - dist0; - } - } -} - -static AOM_FORCE_INLINE void update_coeff_simple( - int *accu_rate, int si, int eob, TX_SIZE tx_size, TX_CLASS tx_class, - int bwl, int64_t rdmult, int shift, const int16_t *dequant, - const int16_t *scan, const LV_MAP_COEFF_COST *txb_costs, - const tran_low_t *tcoeff, tran_low_t *qcoeff, tran_low_t *dqcoeff, - uint8_t *levels) { - const int dqv = dequant[1]; - (void)eob; - // this simple version assumes the coeff's scan_idx is not DC (scan_idx != 0) - // and not the last (scan_idx != eob - 1) - assert(si != eob - 1); - assert(si > 0); - const int ci = scan[si]; - const tran_low_t qc = qcoeff[ci]; - const int coeff_ctx = - get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class); - if (qc == 0) { - *accu_rate += txb_costs->base_cost[coeff_ctx][0]; - } else { - const tran_low_t abs_qc = abs(qc); - const tran_low_t tqc = tcoeff[ci]; - const tran_low_t dqc = dqcoeff[ci]; - const int rate = get_coeff_cost_simple(ci, abs_qc, coeff_ctx, txb_costs, - bwl, tx_class, levels); - if (abs(dqc) < abs(tqc)) { - *accu_rate += rate; - return; - } - const int64_t dist = get_coeff_dist(tqc, dqc, shift); - const int64_t rd = RDCOST(rdmult, rate, dist); - - const int sign = (qc < 0) ? 1 : 0; - tran_low_t qc_low, dqc_low; - get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); - const tran_low_t abs_qc_low = abs_qc - 1; - const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift); - const int rate_low = get_coeff_cost_simple( - ci, abs_qc_low, coeff_ctx, txb_costs, bwl, tx_class, levels); - const int64_t rd_low = RDCOST(rdmult, rate_low, dist_low); - if (rd_low < rd) { - qcoeff[ci] = qc_low; - dqcoeff[ci] = dqc_low; - levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX); - *accu_rate += rate_low; - } else { - *accu_rate += rate; - } - } -} - -static AOM_FORCE_INLINE void update_coeff_eob( - int *accu_rate, int64_t *accu_dist, int *eob, int *nz_num, int *nz_ci, - int si, TX_SIZE tx_size, TX_CLASS tx_class, int bwl, int height, - int dc_sign_ctx, int64_t rdmult, int shift, const int16_t *dequant, - const int16_t *scan, const LV_MAP_EOB_COST *txb_eob_costs, - const LV_MAP_COEFF_COST *txb_costs, const tran_low_t *tcoeff, - tran_low_t *qcoeff, tran_low_t *dqcoeff, uint8_t *levels, int sharpness) { - const int dqv = dequant[si != 0]; - assert(si != *eob - 1); - const int ci = scan[si]; - const tran_low_t qc = qcoeff[ci]; - const int coeff_ctx = - get_lower_levels_ctx(levels, ci, bwl, tx_size, tx_class); - if (qc == 0) { - *accu_rate += txb_costs->base_cost[coeff_ctx][0]; - } else { - int lower_level = 0; - const tran_low_t abs_qc = abs(qc); - const tran_low_t tqc = tcoeff[ci]; - const tran_low_t dqc = dqcoeff[ci]; - const int sign = (qc < 0) ? 1 : 0; - const int64_t dist0 = get_coeff_dist(tqc, 0, shift); - int64_t dist = get_coeff_dist(tqc, dqc, shift) - dist0; - int rate = - get_coeff_cost_general(0, ci, abs_qc, sign, coeff_ctx, dc_sign_ctx, - txb_costs, bwl, tx_class, levels); - int64_t rd = RDCOST(rdmult, *accu_rate + rate, *accu_dist + dist); - - tran_low_t qc_low, dqc_low; - get_qc_dqc_low(abs_qc, sign, dqv, shift, &qc_low, &dqc_low); - const tran_low_t abs_qc_low = abs_qc - 1; - const int64_t dist_low = get_coeff_dist(tqc, dqc_low, shift) - dist0; - const int rate_low = - get_coeff_cost_general(0, ci, abs_qc_low, sign, coeff_ctx, dc_sign_ctx, - txb_costs, bwl, tx_class, levels); - const int64_t rd_low = - RDCOST(rdmult, *accu_rate + rate_low, *accu_dist + dist_low); - - int lower_level_new_eob = 0; - const int new_eob = si + 1; - uint8_t tmp_levels[3]; - for (int ni = 0; ni < *nz_num; ++ni) { - const int last_ci = nz_ci[ni]; - tmp_levels[ni] = levels[get_padded_idx(last_ci, bwl)]; - levels[get_padded_idx(last_ci, bwl)] = 0; - } - - const int coeff_ctx_new_eob = get_lower_levels_ctx_general( - 1, si, bwl, height, levels, ci, tx_size, tx_class); - const int new_eob_cost = - get_eob_cost(new_eob, txb_eob_costs, txb_costs, tx_class); - int rate_coeff_eob = - new_eob_cost + get_coeff_cost_general(1, ci, abs_qc, sign, - coeff_ctx_new_eob, dc_sign_ctx, - txb_costs, bwl, tx_class, levels); - int64_t dist_new_eob = dist; - int64_t rd_new_eob = RDCOST(rdmult, rate_coeff_eob, dist_new_eob); - - if (abs_qc_low > 0) { - const int rate_coeff_eob_low = - new_eob_cost + - get_coeff_cost_general(1, ci, abs_qc_low, sign, coeff_ctx_new_eob, - dc_sign_ctx, txb_costs, bwl, tx_class, levels); - const int64_t dist_new_eob_low = dist_low; - const int64_t rd_new_eob_low = - RDCOST(rdmult, rate_coeff_eob_low, dist_new_eob_low); - if (rd_new_eob_low < rd_new_eob) { - lower_level_new_eob = 1; - rd_new_eob = rd_new_eob_low; - rate_coeff_eob = rate_coeff_eob_low; - dist_new_eob = dist_new_eob_low; - } - } - - if (rd_low < rd) { - lower_level = 1; - rd = rd_low; - rate = rate_low; - dist = dist_low; - } - - if (sharpness == 0 && rd_new_eob < rd) { - for (int ni = 0; ni < *nz_num; ++ni) { - int last_ci = nz_ci[ni]; - // levels[get_padded_idx(last_ci, bwl)] = 0; - qcoeff[last_ci] = 0; - dqcoeff[last_ci] = 0; - } - *eob = new_eob; - *nz_num = 0; - *accu_rate = rate_coeff_eob; - *accu_dist = dist_new_eob; - lower_level = lower_level_new_eob; - } else { - for (int ni = 0; ni < *nz_num; ++ni) { - const int last_ci = nz_ci[ni]; - levels[get_padded_idx(last_ci, bwl)] = tmp_levels[ni]; - } - *accu_rate += rate; - *accu_dist += dist; - } - - if (lower_level) { - qcoeff[ci] = qc_low; - dqcoeff[ci] = dqc_low; - levels[get_padded_idx(ci, bwl)] = AOMMIN(abs_qc_low, INT8_MAX); - } - if (qcoeff[ci]) { - nz_ci[*nz_num] = ci; - ++*nz_num; - } - } -} - -static INLINE void update_skip(int *accu_rate, int64_t accu_dist, int *eob, - int nz_num, int *nz_ci, int64_t rdmult, - int skip_cost, int non_skip_cost, - tran_low_t *qcoeff, tran_low_t *dqcoeff, - int sharpness) { - const int64_t rd = RDCOST(rdmult, *accu_rate + non_skip_cost, accu_dist); - const int64_t rd_new_eob = RDCOST(rdmult, skip_cost, 0); - if (sharpness == 0 && rd_new_eob < rd) { - for (int i = 0; i < nz_num; ++i) { - const int ci = nz_ci[i]; - qcoeff[ci] = 0; - dqcoeff[ci] = 0; - // no need to set up levels because this is the last step - // levels[get_padded_idx(ci, bwl)] = 0; - } - *accu_rate = 0; - *eob = 0; - } -} - -int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, - int block, TX_SIZE tx_size, TX_TYPE tx_type, - const TXB_CTX *const txb_ctx, int *rate_cost, - int sharpness) { - const AV1_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - const PLANE_TYPE plane_type = get_plane_type(plane); - const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); - const TX_CLASS tx_class = tx_type_to_class[tx_type]; - const MB_MODE_INFO *mbmi = xd->mi[0]; - const struct macroblock_plane *p = &x->plane[plane]; - struct macroblockd_plane *pd = &xd->plane[plane]; - tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); - tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block); - const int16_t *dequant = p->dequant_QTX; - const int bwl = get_txb_bwl(tx_size); - const int width = get_txb_wide(tx_size); - const int height = get_txb_high(tx_size); - assert(width == (1 << bwl)); - const int is_inter = is_inter_block(mbmi); - const SCAN_ORDER *scan_order = get_scan(tx_size, tx_type); - const int16_t *scan = scan_order->scan; - const LV_MAP_COEFF_COST *txb_costs = &x->coeff_costs[txs_ctx][plane_type]; - const int eob_multi_size = txsize_log2_minus4[tx_size]; - const LV_MAP_EOB_COST *txb_eob_costs = - &x->eob_costs[eob_multi_size][plane_type]; - - const int shift = av1_get_tx_scale(tx_size); - const int64_t rdmult = - ((x->rdmult * plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8))) + - 2) >> - (sharpness + - (cpi->oxcf.aq_mode == VARIANCE_AQ && mbmi->segment_id < 4 - ? 7 - mbmi->segment_id - : 2) + - (cpi->oxcf.aq_mode != VARIANCE_AQ && - cpi->oxcf.deltaq_mode > NO_DELTA_Q && x->sb_energy_level < 0 - ? (3 - x->sb_energy_level) - : 0)); - - uint8_t levels_buf[TX_PAD_2D]; - uint8_t *const levels = set_levels(levels_buf, width); - - av1_txb_init_levels(qcoeff, width, height, levels); - - // TODO(angirbird): check iqmatrix - - const int non_skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][0]; - const int skip_cost = txb_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1]; - int eob = p->eobs[block]; - const int eob_cost = get_eob_cost(eob, txb_eob_costs, txb_costs, tx_class); - int accu_rate = eob_cost; - int64_t accu_dist = 0; - int si = eob - 1; - const int ci = scan[si]; - const tran_low_t qc = qcoeff[ci]; - const tran_low_t abs_qc = abs(qc); - const int sign = qc < 0; - const int max_nz_num = 2; - int nz_num = 1; - int nz_ci[3] = { ci, 0, 0 }; - if (abs_qc >= 2) { - update_coeff_general(&accu_rate, &accu_dist, si, eob, tx_size, tx_class, - bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx, - dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff, - levels); - --si; - } else { - assert(abs_qc == 1); - const int coeff_ctx = get_lower_levels_ctx_general( - 1, si, bwl, height, levels, ci, tx_size, tx_class); - accu_rate += get_coeff_cost_general(1, ci, abs_qc, sign, coeff_ctx, - txb_ctx->dc_sign_ctx, txb_costs, bwl, - tx_class, levels); - const tran_low_t tqc = tcoeff[ci]; - const tran_low_t dqc = dqcoeff[ci]; - const int64_t dist = get_coeff_dist(tqc, dqc, shift); - const int64_t dist0 = get_coeff_dist(tqc, 0, shift); - accu_dist += dist - dist0; - --si; - } - -#define UPDATE_COEFF_EOB_CASE(tx_class_literal) \ - case tx_class_literal: \ - for (; si >= 0 && nz_num <= max_nz_num; --si) { \ - update_coeff_eob(&accu_rate, &accu_dist, &eob, &nz_num, nz_ci, si, \ - tx_size, tx_class_literal, bwl, height, \ - txb_ctx->dc_sign_ctx, rdmult, shift, dequant, scan, \ - txb_eob_costs, txb_costs, tcoeff, qcoeff, dqcoeff, \ - levels, sharpness); \ - } \ - break; - switch (tx_class) { - UPDATE_COEFF_EOB_CASE(TX_CLASS_2D); - UPDATE_COEFF_EOB_CASE(TX_CLASS_HORIZ); - UPDATE_COEFF_EOB_CASE(TX_CLASS_VERT); -#undef UPDATE_COEFF_EOB_CASE - default: assert(false); - } - - if (si == -1 && nz_num <= max_nz_num) { - update_skip(&accu_rate, accu_dist, &eob, nz_num, nz_ci, rdmult, skip_cost, - non_skip_cost, qcoeff, dqcoeff, sharpness); - } - -#define UPDATE_COEFF_SIMPLE_CASE(tx_class_literal) \ - case tx_class_literal: \ - for (; si >= 1; --si) { \ - update_coeff_simple(&accu_rate, si, eob, tx_size, tx_class_literal, bwl, \ - rdmult, shift, dequant, scan, txb_costs, tcoeff, \ - qcoeff, dqcoeff, levels); \ - } \ - break; - switch (tx_class) { - UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_2D); - UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_HORIZ); - UPDATE_COEFF_SIMPLE_CASE(TX_CLASS_VERT); -#undef UPDATE_COEFF_SIMPLE_CASE - default: assert(false); - } - - // DC position - if (si == 0) { - // no need to update accu_dist because it's not used after this point - int64_t dummy_dist = 0; - update_coeff_general(&accu_rate, &dummy_dist, si, eob, tx_size, tx_class, - bwl, height, rdmult, shift, txb_ctx->dc_sign_ctx, - dequant, scan, txb_costs, tcoeff, qcoeff, dqcoeff, - levels); - } - - const int tx_type_cost = get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type); - if (eob == 0) - accu_rate += skip_cost; - else - accu_rate += non_skip_cost + tx_type_cost; - - p->eobs[block] = eob; - p->txb_entropy_ctx[block] = - av1_get_txb_entropy_context(qcoeff, scan_order, p->eobs[block]); - - *rate_cost = accu_rate; - return eob; -} - -// This function is deprecated, but we keep it here because hash trellis -// is not integrated with av1_optimize_txb_new yet -int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, - int blk_row, int blk_col, int block, TX_SIZE tx_size, - TXB_CTX *txb_ctx, int fast_mode, int *rate_cost) { - const AV1_COMMON *cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - const PLANE_TYPE plane_type = get_plane_type(plane); - const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); - const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, - tx_size, cm->reduced_tx_set_used); - const MB_MODE_INFO *mbmi = xd->mi[0]; - const struct macroblock_plane *p = &x->plane[plane]; - struct macroblockd_plane *pd = &xd->plane[plane]; - const int eob = p->eobs[block]; - tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); - tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - const tran_low_t *tcoeff = BLOCK_OFFSET(p->coeff, block); - const int16_t *dequant = p->dequant_QTX; - const int seg_eob = av1_get_max_eob(tx_size); - const int bwl = get_txb_bwl(tx_size); - const int width = get_txb_wide(tx_size); - const int height = get_txb_high(tx_size); - const int is_inter = is_inter_block(mbmi); - const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); - const LV_MAP_COEFF_COST *txb_costs = &x->coeff_costs[txs_ctx][plane_type]; - const int eob_multi_size = txsize_log2_minus4[tx_size]; - const LV_MAP_EOB_COST txb_eob_costs = - x->eob_costs[eob_multi_size][plane_type]; - - const int shift = av1_get_tx_scale(tx_size); - const int64_t rdmult = - ((x->rdmult * plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8))) + - 2) >> - 2; - uint8_t levels_buf[TX_PAD_2D]; - uint8_t *const levels = set_levels(levels_buf, width); - const TX_SIZE qm_tx_size = av1_get_adjusted_tx_size(tx_size); - const qm_val_t *iqmatrix = - IS_2D_TRANSFORM(tx_type) - ? pd->seg_iqmatrix[mbmi->segment_id][qm_tx_size] - : cm->giqmatrix[NUM_QM_LEVELS - 1][0][qm_tx_size]; - assert(width == (1 << bwl)); - const int tx_type_cost = get_tx_type_cost(cm, x, xd, plane, tx_size, tx_type); - TxbInfo txb_info = { - qcoeff, levels, dqcoeff, tcoeff, dequant, shift, - tx_size, txs_ctx, tx_type, bwl, width, height, - eob, seg_eob, scan_order, txb_ctx, rdmult, &cm->coeff_ctx_table, - iqmatrix, tx_type_cost, - }; - - // Hash based trellis (hbt) speed feature: avoid expensive optimize_txb calls - // by storing the coefficient deltas in a hash table. - // Currently disabled in speedfeatures.c - if (eob <= HBT_EOB && eob > 0 && cpi->sf.use_hash_based_trellis) { - return hbt_create_hashes(&txb_info, txb_costs, &txb_eob_costs, p, block, - fast_mode, rate_cost); - } - - av1_txb_init_levels(qcoeff, width, height, levels); - - const int update = - optimize_txb(&txb_info, txb_costs, &txb_eob_costs, rate_cost); - - if (update) { - p->eobs[block] = txb_info.eob; - p->txb_entropy_ctx[block] = - av1_get_txb_entropy_context(qcoeff, scan_order, txb_info.eob); - } - return txb_info.eob; -} - -int av1_get_txb_entropy_context(const tran_low_t *qcoeff, - const SCAN_ORDER *scan_order, int eob) { - const int16_t *const scan = scan_order->scan; - int cul_level = 0; - int c; - - if (eob == 0) return 0; - for (c = 0; c < eob; ++c) { - cul_level += abs(qcoeff[scan[c]]); - if (cul_level > COEFF_CONTEXT_MASK) break; - } - - cul_level = AOMMIN(COEFF_CONTEXT_MASK, cul_level); - set_dc_sign(&cul_level, qcoeff[0]); - - return cul_level; -} - -void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, - void *arg) { - struct tokenize_b_args *const args = arg; - const AV1_COMP *cpi = args->cpi; - const AV1_COMMON *cm = &cpi->common; - ThreadData *const td = args->td; - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - struct macroblock_plane *p = &x->plane[plane]; - struct macroblockd_plane *pd = &xd->plane[plane]; - const uint16_t eob = p->eobs[block]; - const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); - const PLANE_TYPE plane_type = pd->plane_type; - const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, - tx_size, cm->reduced_tx_set_used); - const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); - const int cul_level = av1_get_txb_entropy_context(qcoeff, scan_order, eob); - av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, blk_col, - blk_row); -} - -static void update_tx_type_count(const AV1_COMMON *cm, MACROBLOCKD *xd, - int blk_row, int blk_col, int plane, - TX_SIZE tx_size, FRAME_COUNTS *counts, - uint8_t allow_update_cdf) { - MB_MODE_INFO *mbmi = xd->mi[0]; - int is_inter = is_inter_block(mbmi); - FRAME_CONTEXT *fc = xd->tile_ctx; -#if !CONFIG_ENTROPY_STATS - (void)counts; -#endif // !CONFIG_ENTROPY_STATS - - // Only y plane's tx_type is updated - if (plane > 0) return; - TX_TYPE tx_type = av1_get_tx_type(PLANE_TYPE_Y, xd, blk_row, blk_col, tx_size, - cm->reduced_tx_set_used); - if (get_ext_tx_types(tx_size, is_inter, cm->reduced_tx_set_used) > 1 && - cm->base_qindex > 0 && !mbmi->skip && - !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { - const int eset = get_ext_tx_set(tx_size, is_inter, cm->reduced_tx_set_used); - if (eset > 0) { - const TxSetType tx_set_type = - av1_get_ext_tx_set_type(tx_size, is_inter, cm->reduced_tx_set_used); - if (is_inter) { - if (allow_update_cdf) { - update_cdf(fc->inter_ext_tx_cdf[eset][txsize_sqr_map[tx_size]], - av1_ext_tx_ind[tx_set_type][tx_type], - av1_num_ext_tx_set[tx_set_type]); - } -#if CONFIG_ENTROPY_STATS - ++counts->inter_ext_tx[eset][txsize_sqr_map[tx_size]] - [av1_ext_tx_ind[tx_set_type][tx_type]]; -#endif // CONFIG_ENTROPY_STATS - } else { - PREDICTION_MODE intra_dir; - if (mbmi->filter_intra_mode_info.use_filter_intra) - intra_dir = fimode_to_intradir[mbmi->filter_intra_mode_info - .filter_intra_mode]; - else - intra_dir = mbmi->mode; -#if CONFIG_ENTROPY_STATS - ++counts->intra_ext_tx[eset][txsize_sqr_map[tx_size]][intra_dir] - [av1_ext_tx_ind[tx_set_type][tx_type]]; -#endif // CONFIG_ENTROPY_STATS - if (allow_update_cdf) { - update_cdf( - fc->intra_ext_tx_cdf[eset][txsize_sqr_map[tx_size]][intra_dir], - av1_ext_tx_ind[tx_set_type][tx_type], - av1_num_ext_tx_set[tx_set_type]); - } - } - } - } -} - -void av1_update_and_record_txb_context(int plane, int block, int blk_row, - int blk_col, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - struct tokenize_b_args *const args = arg; - const AV1_COMP *cpi = args->cpi; - const AV1_COMMON *cm = &cpi->common; - ThreadData *const td = args->td; - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - struct macroblock_plane *p = &x->plane[plane]; - struct macroblockd_plane *pd = &xd->plane[plane]; - MB_MODE_INFO *mbmi = xd->mi[0]; - const int eob = p->eobs[block]; - TXB_CTX txb_ctx; - get_txb_ctx(plane_bsize, tx_size, plane, pd->above_context + blk_col, - pd->left_context + blk_row, &txb_ctx); - const int bwl = get_txb_bwl(tx_size); - const int width = get_txb_wide(tx_size); - const int height = get_txb_high(tx_size); - const uint8_t allow_update_cdf = args->allow_update_cdf; - const TX_SIZE txsize_ctx = get_txsize_entropy_ctx(tx_size); - FRAME_CONTEXT *ec_ctx = xd->tile_ctx; -#if CONFIG_ENTROPY_STATS - int cdf_idx = cm->coef_cdf_category; -#endif // CONFIG_ENTROPY_STATS - -#if CONFIG_ENTROPY_STATS - ++td->counts->txb_skip[cdf_idx][txsize_ctx][txb_ctx.txb_skip_ctx][eob == 0]; -#endif // CONFIG_ENTROPY_STATS - if (allow_update_cdf) { - update_cdf(ec_ctx->txb_skip_cdf[txsize_ctx][txb_ctx.txb_skip_ctx], eob == 0, - 2); - } - - x->mbmi_ext->txb_skip_ctx[plane][block] = txb_ctx.txb_skip_ctx; - x->mbmi_ext->eobs[plane][block] = eob; - - if (eob == 0) { - av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, 0, blk_col, blk_row); - return; - } - - tran_low_t *tcoeff = BLOCK_OFFSET(x->mbmi_ext->tcoeff[plane], block); - const int segment_id = mbmi->segment_id; - const int seg_eob = av1_get_tx_eob(&cpi->common.seg, segment_id, tx_size); - const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); - memcpy(tcoeff, qcoeff, sizeof(*tcoeff) * seg_eob); - - uint8_t levels_buf[TX_PAD_2D]; - uint8_t *const levels = set_levels(levels_buf, width); - av1_txb_init_levels(tcoeff, width, height, levels); - update_tx_type_count(cm, xd, blk_row, blk_col, plane, tx_size, td->counts, - allow_update_cdf); - - const PLANE_TYPE plane_type = pd->plane_type; - const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, - tx_size, cm->reduced_tx_set_used); - const TX_CLASS tx_class = tx_type_to_class[tx_type]; - const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type); - const int16_t *const scan = scan_order->scan; -#if CONFIG_ENTROPY_STATS - av1_update_eob_context(cdf_idx, eob, tx_size, tx_class, plane_type, ec_ctx, - td->counts, allow_update_cdf); -#else - av1_update_eob_context(eob, tx_size, tx_class, plane_type, ec_ctx, - allow_update_cdf); -#endif - - DECLARE_ALIGNED(16, int8_t, coeff_contexts[MAX_TX_SQUARE]); - av1_get_nz_map_contexts(levels, scan, eob, tx_size, tx_class, coeff_contexts); - - for (int c = eob - 1; c >= 0; --c) { - const int pos = scan[c]; - const int coeff_ctx = coeff_contexts[pos]; - const tran_low_t v = qcoeff[pos]; - const tran_low_t level = abs(v); - - if (allow_update_cdf) { - if (c == eob - 1) { - assert(coeff_ctx < 4); - update_cdf( - ec_ctx->coeff_base_eob_cdf[txsize_ctx][plane_type][coeff_ctx], - AOMMIN(level, 3) - 1, 3); - } else { - update_cdf(ec_ctx->coeff_base_cdf[txsize_ctx][plane_type][coeff_ctx], - AOMMIN(level, 3), 4); - } - } - { - if (c == eob - 1) { - assert(coeff_ctx < 4); -#if CONFIG_ENTROPY_STATS - ++td->counts->coeff_base_eob_multi[cdf_idx][txsize_ctx][plane_type] - [coeff_ctx][AOMMIN(level, 3) - 1]; - } else { - ++td->counts->coeff_base_multi[cdf_idx][txsize_ctx][plane_type] - [coeff_ctx][AOMMIN(level, 3)]; -#endif - } - } - if (level > NUM_BASE_LEVELS) { - const int base_range = level - 1 - NUM_BASE_LEVELS; - const int br_ctx = get_br_ctx(levels, pos, bwl, tx_class); - for (int idx = 0; idx < COEFF_BASE_RANGE; idx += BR_CDF_SIZE - 1) { - const int k = AOMMIN(base_range - idx, BR_CDF_SIZE - 1); - if (allow_update_cdf) { - update_cdf(ec_ctx->coeff_br_cdf[AOMMIN(txsize_ctx, TX_32X32)] - [plane_type][br_ctx], - k, BR_CDF_SIZE); - } - for (int lps = 0; lps < BR_CDF_SIZE - 1; lps++) { -#if CONFIG_ENTROPY_STATS - ++td->counts->coeff_lps[AOMMIN(txsize_ctx, TX_32X32)][plane_type][lps] - [br_ctx][lps == k]; -#endif // CONFIG_ENTROPY_STATS - if (lps == k) break; - } -#if CONFIG_ENTROPY_STATS - ++td->counts->coeff_lps_multi[cdf_idx][AOMMIN(txsize_ctx, TX_32X32)] - [plane_type][br_ctx][k]; -#endif - if (k < BR_CDF_SIZE - 1) break; - } - } - } - - // Update the context needed to code the DC sign (if applicable) - if (tcoeff[0] != 0) { - const int dc_sign = (tcoeff[0] < 0) ? 1 : 0; - const int dc_sign_ctx = txb_ctx.dc_sign_ctx; -#if CONFIG_ENTROPY_STATS - ++td->counts->dc_sign[plane_type][dc_sign_ctx][dc_sign]; -#endif // CONFIG_ENTROPY_STATS - if (allow_update_cdf) - update_cdf(ec_ctx->dc_sign_cdf[plane_type][dc_sign_ctx], dc_sign, 2); - x->mbmi_ext->dc_sign_ctx[plane][block] = dc_sign_ctx; - } - - const int cul_level = av1_get_txb_entropy_context(tcoeff, scan_order, eob); - av1_set_contexts(xd, pd, plane, plane_bsize, tx_size, cul_level, blk_col, - blk_row); -} - -void av1_update_txb_context(const AV1_COMP *cpi, ThreadData *td, - RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate, - int mi_row, int mi_col, uint8_t allow_update_cdf) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - struct tokenize_b_args arg = { cpi, td, NULL, 0, allow_update_cdf }; - (void)rate; - (void)mi_row; - (void)mi_col; - if (mbmi->skip) { - av1_reset_skip_context(xd, mi_row, mi_col, bsize, num_planes); - return; - } - - if (!dry_run) { - av1_foreach_transformed_block(xd, bsize, mi_row, mi_col, - av1_update_and_record_txb_context, &arg, - num_planes); - } else if (dry_run == DRY_RUN_NORMAL) { - av1_foreach_transformed_block(xd, bsize, mi_row, mi_col, - av1_update_txb_context_b, &arg, num_planes); - } else { - printf("DRY_RUN_COSTCOEFFS is not supported yet\n"); - assert(0); - } -} diff --git a/third_party/aom/av1/encoder/encodetxb.h b/third_party/aom/av1/encoder/encodetxb.h deleted file mode 100644 index 40ae343b0..000000000 --- a/third_party/aom/av1/encoder/encodetxb.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_ENCODETXB_H_ -#define AOM_AV1_ENCODER_ENCODETXB_H_ - -#include "config/aom_config.h" - -#include "av1/common/blockd.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/txb_common.h" -#include "av1/encoder/block.h" -#include "av1/encoder/encoder.h" -#include "aom_dsp/bitwriter.h" -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct TxbInfo { - tran_low_t *qcoeff; - uint8_t *levels; // absolute values and clamped to 255. - tran_low_t *dqcoeff; - const tran_low_t *tcoeff; - const int16_t *dequant; - int shift; - TX_SIZE tx_size; - TX_SIZE txs_ctx; - TX_TYPE tx_type; - int bwl; - int width; - int height; - int eob; - int seg_eob; - const SCAN_ORDER *scan_order; - TXB_CTX *txb_ctx; - int64_t rdmult; - const LV_MAP_CTX_TABLE *coeff_ctx_table; - const qm_val_t *iqmatrix; - int tx_type_cost; -} TxbInfo; - -void av1_alloc_txb_buf(AV1_COMP *cpi); -void av1_free_txb_buf(AV1_COMP *cpi); -int av1_cost_coeffs_txb(const AV1_COMMON *const cm, const MACROBLOCK *x, - const int plane, const int block, const TX_SIZE tx_size, - const TX_TYPE tx_type, const TXB_CTX *const txb_ctx); -void av1_write_coeffs_txb(const AV1_COMMON *const cm, MACROBLOCKD *xd, - aom_writer *w, int blk_row, int blk_col, int plane, - TX_SIZE tx_size, const tran_low_t *tcoeff, - uint16_t eob, TXB_CTX *txb_ctx); -void av1_write_coeffs_mb(const AV1_COMMON *const cm, MACROBLOCK *x, int mi_row, - int mi_col, aom_writer *w, BLOCK_SIZE bsize); -int av1_get_txb_entropy_context(const tran_low_t *qcoeff, - const SCAN_ORDER *scan_order, int eob); -void av1_update_txb_context(const AV1_COMP *cpi, ThreadData *td, - RUN_TYPE dry_run, BLOCK_SIZE bsize, int *rate, - int mi_row, int mi_col, uint8_t allow_update_cdf); - -void av1_update_txb_context_b(int plane, int block, int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, - void *arg); - -void av1_update_and_record_txb_context(int plane, int block, int blk_row, - int blk_col, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg); - -void av1_set_coeff_buffer(const AV1_COMP *const cpi, MACROBLOCK *const x, - int mi_row, int mi_col); - -void hbt_destroy(); -int av1_optimize_txb_new(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane, - int block, TX_SIZE tx_size, TX_TYPE tx_type, - const TXB_CTX *const txb_ctx, int *rate_cost, - int sharpness); -#ifdef __cplusplus -} -#endif - -#endif // AOM_AV1_ENCODER_ENCODETXB_H_ diff --git a/third_party/aom/av1/encoder/ethread.c b/third_party/aom/av1/encoder/ethread.c deleted file mode 100644 index e8ac30bb5..000000000 --- a/third_party/aom/av1/encoder/ethread.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "av1/encoder/encodeframe.h" -#include "av1/encoder/encoder.h" -#include "av1/encoder/ethread.h" -#include "aom_dsp/aom_dsp_common.h" - -static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) { - for (int i = 0; i < REFERENCE_MODES; i++) - td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i]; - - for (int i = 0; i < REF_FRAMES; i++) - td->rd_counts.global_motion_used[i] += - td_t->rd_counts.global_motion_used[i]; - - td->rd_counts.compound_ref_used_flag |= - td_t->rd_counts.compound_ref_used_flag; - td->rd_counts.skip_mode_used_flag |= td_t->rd_counts.skip_mode_used_flag; -} - -static int enc_worker_hook(void *arg1, void *unused) { - EncWorkerData *const thread_data = (EncWorkerData *)arg1; - AV1_COMP *const cpi = thread_data->cpi; - const AV1_COMMON *const cm = &cpi->common; - const int tile_cols = cm->tile_cols; - const int tile_rows = cm->tile_rows; - int t; - - (void)unused; - - for (t = thread_data->start; t < tile_rows * tile_cols; - t += cpi->num_workers) { - int tile_row = t / tile_cols; - int tile_col = t % tile_cols; - - av1_encode_tile(cpi, thread_data->td, tile_row, tile_col); - } - - return 1; -} - -static void create_enc_workers(AV1_COMP *cpi, int num_workers) { - AV1_COMMON *const cm = &cpi->common; - const AVxWorkerInterface *const winterface = aom_get_worker_interface(); - - CHECK_MEM_ERROR(cm, cpi->workers, - aom_malloc(num_workers * sizeof(*cpi->workers))); - - CHECK_MEM_ERROR(cm, cpi->tile_thr_data, - aom_calloc(num_workers, sizeof(*cpi->tile_thr_data))); - - for (int i = 0; i < num_workers; i++) { - AVxWorker *const worker = &cpi->workers[i]; - EncWorkerData *const thread_data = &cpi->tile_thr_data[i]; - - ++cpi->num_workers; - winterface->init(worker); - - thread_data->cpi = cpi; - - if (i < num_workers - 1) { - // Allocate thread data. - CHECK_MEM_ERROR(cm, thread_data->td, - aom_memalign(32, sizeof(*thread_data->td))); - av1_zero(*thread_data->td); - - // Set up pc_tree. - thread_data->td->pc_tree = NULL; - av1_setup_pc_tree(cm, thread_data->td); - - CHECK_MEM_ERROR(cm, thread_data->td->above_pred_buf, - (uint8_t *)aom_memalign( - 16, MAX_MB_PLANE * MAX_SB_SQUARE * - sizeof(*thread_data->td->above_pred_buf))); - CHECK_MEM_ERROR(cm, thread_data->td->left_pred_buf, - (uint8_t *)aom_memalign( - 16, MAX_MB_PLANE * MAX_SB_SQUARE * - sizeof(*thread_data->td->left_pred_buf))); - - CHECK_MEM_ERROR( - cm, thread_data->td->wsrc_buf, - (int32_t *)aom_memalign( - 16, MAX_SB_SQUARE * sizeof(*thread_data->td->wsrc_buf))); - - for (int x = 0; x < 2; x++) - for (int y = 0; y < 2; y++) - CHECK_MEM_ERROR( - cm, thread_data->td->hash_value_buffer[x][y], - (uint32_t *)aom_malloc( - AOM_BUFFER_SIZE_FOR_BLOCK_HASH * - sizeof(*thread_data->td->hash_value_buffer[0][0]))); - - CHECK_MEM_ERROR( - cm, thread_data->td->mask_buf, - (int32_t *)aom_memalign( - 16, MAX_SB_SQUARE * sizeof(*thread_data->td->mask_buf))); - // Allocate frame counters in thread data. - CHECK_MEM_ERROR(cm, thread_data->td->counts, - aom_calloc(1, sizeof(*thread_data->td->counts))); - - // Allocate buffers used by palette coding mode. - CHECK_MEM_ERROR( - cm, thread_data->td->palette_buffer, - aom_memalign(16, sizeof(*thread_data->td->palette_buffer))); - - CHECK_MEM_ERROR( - cm, thread_data->td->tmp_conv_dst, - aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE * - sizeof(*thread_data->td->tmp_conv_dst))); - for (int j = 0; j < 2; ++j) { - CHECK_MEM_ERROR( - cm, thread_data->td->tmp_obmc_bufs[j], - aom_memalign(16, 2 * MAX_MB_PLANE * MAX_SB_SQUARE * - sizeof(*thread_data->td->tmp_obmc_bufs[j]))); - } - - // Create threads - if (!winterface->reset(worker)) - aom_internal_error(&cm->error, AOM_CODEC_ERROR, - "Tile encoder thread creation failed"); - } else { - // Main thread acts as a worker and uses the thread data in cpi. - thread_data->td = &cpi->td; - } - winterface->sync(worker); - } -} - -static void launch_enc_workers(AV1_COMP *cpi, int num_workers) { - const AVxWorkerInterface *const winterface = aom_get_worker_interface(); - // Encode a frame - for (int i = 0; i < num_workers; i++) { - AVxWorker *const worker = &cpi->workers[i]; - EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; - - // Set the starting tile for each thread. - thread_data->start = i; - - if (i == cpi->num_workers - 1) - winterface->execute(worker); - else - winterface->launch(worker); - } -} - -static void sync_enc_workers(AV1_COMP *cpi, int num_workers) { - const AVxWorkerInterface *const winterface = aom_get_worker_interface(); - - // Encoding ends. - for (int i = 0; i < num_workers; i++) { - AVxWorker *const worker = &cpi->workers[i]; - winterface->sync(worker); - } -} - -static void accumulate_counters_enc_workers(AV1_COMP *cpi, int num_workers) { - for (int i = 0; i < num_workers; i++) { - AVxWorker *const worker = &cpi->workers[i]; - EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; - cpi->intrabc_used |= thread_data->td->intrabc_used_this_tile; - // Accumulate counters. - if (i < cpi->num_workers - 1) { - av1_accumulate_frame_counts(&cpi->counts, thread_data->td->counts); - accumulate_rd_opt(&cpi->td, thread_data->td); - cpi->td.mb.txb_split_count += thread_data->td->mb.txb_split_count; - } - } -} - -static void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook, - int num_workers) { - for (int i = 0; i < num_workers; i++) { - AVxWorker *const worker = &cpi->workers[i]; - EncWorkerData *const thread_data = &cpi->tile_thr_data[i]; - - worker->hook = hook; - worker->data1 = thread_data; - worker->data2 = NULL; - - // Before encoding a frame, copy the thread data from cpi. - if (thread_data->td != &cpi->td) { - thread_data->td->mb = cpi->td.mb; - thread_data->td->rd_counts = cpi->td.rd_counts; - thread_data->td->mb.above_pred_buf = thread_data->td->above_pred_buf; - thread_data->td->mb.left_pred_buf = thread_data->td->left_pred_buf; - thread_data->td->mb.wsrc_buf = thread_data->td->wsrc_buf; - for (int x = 0; x < 2; x++) { - for (int y = 0; y < 2; y++) { - memcpy(thread_data->td->hash_value_buffer[x][y], - cpi->td.mb.hash_value_buffer[x][y], - AOM_BUFFER_SIZE_FOR_BLOCK_HASH * - sizeof(*thread_data->td->hash_value_buffer[0][0])); - thread_data->td->mb.hash_value_buffer[x][y] = - thread_data->td->hash_value_buffer[x][y]; - } - } - thread_data->td->mb.mask_buf = thread_data->td->mask_buf; - } - if (thread_data->td->counts != &cpi->counts) { - memcpy(thread_data->td->counts, &cpi->counts, sizeof(cpi->counts)); - } - - if (i < num_workers - 1) { - thread_data->td->mb.palette_buffer = thread_data->td->palette_buffer; - thread_data->td->mb.tmp_conv_dst = thread_data->td->tmp_conv_dst; - for (int j = 0; j < 2; ++j) { - thread_data->td->mb.tmp_obmc_bufs[j] = - thread_data->td->tmp_obmc_bufs[j]; - } - - thread_data->td->mb.e_mbd.tmp_conv_dst = thread_data->td->mb.tmp_conv_dst; - for (int j = 0; j < 2; ++j) { - thread_data->td->mb.e_mbd.tmp_obmc_bufs[j] = - thread_data->td->mb.tmp_obmc_bufs[j]; - } - } - } -} - -void av1_encode_tiles_mt(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - const int tile_cols = cm->tile_cols; - const int tile_rows = cm->tile_rows; - int num_workers = AOMMIN(cpi->oxcf.max_threads, tile_cols * tile_rows); - - if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) - av1_alloc_tile_data(cpi); - - av1_init_tile_data(cpi); - // Only run once to create threads and allocate thread data. - if (cpi->num_workers == 0) { - create_enc_workers(cpi, num_workers); - } else { - num_workers = AOMMIN(num_workers, cpi->num_workers); - } - prepare_enc_workers(cpi, enc_worker_hook, num_workers); - launch_enc_workers(cpi, num_workers); - sync_enc_workers(cpi, num_workers); - accumulate_counters_enc_workers(cpi, num_workers); -} - -// Accumulate frame counts. FRAME_COUNTS consist solely of 'unsigned int' -// members, so we treat it as an array, and sum over the whole length. -void av1_accumulate_frame_counts(FRAME_COUNTS *acc_counts, - const FRAME_COUNTS *counts) { - unsigned int *const acc = (unsigned int *)acc_counts; - const unsigned int *const cnt = (const unsigned int *)counts; - - const unsigned int n_counts = sizeof(FRAME_COUNTS) / sizeof(unsigned int); - - for (unsigned int i = 0; i < n_counts; i++) acc[i] += cnt[i]; -} diff --git a/third_party/aom/av1/encoder/ethread.h b/third_party/aom/av1/encoder/ethread.h deleted file mode 100644 index 5de4b4803..000000000 --- a/third_party/aom/av1/encoder/ethread.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_ETHREAD_H_ -#define AOM_AV1_ENCODER_ETHREAD_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -struct AV1_COMP; -struct ThreadData; - -typedef struct EncWorkerData { - struct AV1_COMP *cpi; - struct ThreadData *td; - int start; -} EncWorkerData; - -void av1_encode_tiles_mt(struct AV1_COMP *cpi); - -void av1_accumulate_frame_counts(struct FRAME_COUNTS *acc_counts, - const struct FRAME_COUNTS *counts); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_ETHREAD_H_ diff --git a/third_party/aom/av1/encoder/extend.c b/third_party/aom/av1/encoder/extend.c deleted file mode 100644 index e9621a574..000000000 --- a/third_party/aom/av1/encoder/extend.c +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/mem.h" - -#include "av1/common/common.h" -#include "av1/encoder/extend.h" - -static void copy_and_extend_plane(const uint8_t *src, int src_pitch, - uint8_t *dst, int dst_pitch, int w, int h, - int extend_top, int extend_left, - int extend_bottom, int extend_right) { - int i, linesize; - - // copy the left and right most columns out - const uint8_t *src_ptr1 = src; - const uint8_t *src_ptr2 = src + w - 1; - uint8_t *dst_ptr1 = dst - extend_left; - uint8_t *dst_ptr2 = dst + w; - - for (i = 0; i < h; i++) { - memset(dst_ptr1, src_ptr1[0], extend_left); - memcpy(dst_ptr1 + extend_left, src_ptr1, w); - memset(dst_ptr2, src_ptr2[0], extend_right); - src_ptr1 += src_pitch; - src_ptr2 += src_pitch; - dst_ptr1 += dst_pitch; - dst_ptr2 += dst_pitch; - } - - // Now copy the top and bottom lines into each line of the respective - // borders - src_ptr1 = dst - extend_left; - src_ptr2 = dst + dst_pitch * (h - 1) - extend_left; - dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left; - dst_ptr2 = dst + dst_pitch * (h)-extend_left; - linesize = extend_left + extend_right + w; - - for (i = 0; i < extend_top; i++) { - memcpy(dst_ptr1, src_ptr1, linesize); - dst_ptr1 += dst_pitch; - } - - for (i = 0; i < extend_bottom; i++) { - memcpy(dst_ptr2, src_ptr2, linesize); - dst_ptr2 += dst_pitch; - } -} - -static void highbd_copy_and_extend_plane(const uint8_t *src8, int src_pitch, - uint8_t *dst8, int dst_pitch, int w, - int h, int extend_top, int extend_left, - int extend_bottom, int extend_right) { - int i, linesize; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); - - // copy the left and right most columns out - const uint16_t *src_ptr1 = src; - const uint16_t *src_ptr2 = src + w - 1; - uint16_t *dst_ptr1 = dst - extend_left; - uint16_t *dst_ptr2 = dst + w; - - for (i = 0; i < h; i++) { - aom_memset16(dst_ptr1, src_ptr1[0], extend_left); - memcpy(dst_ptr1 + extend_left, src_ptr1, w * sizeof(src_ptr1[0])); - aom_memset16(dst_ptr2, src_ptr2[0], extend_right); - src_ptr1 += src_pitch; - src_ptr2 += src_pitch; - dst_ptr1 += dst_pitch; - dst_ptr2 += dst_pitch; - } - - // Now copy the top and bottom lines into each line of the respective - // borders - src_ptr1 = dst - extend_left; - src_ptr2 = dst + dst_pitch * (h - 1) - extend_left; - dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left; - dst_ptr2 = dst + dst_pitch * (h)-extend_left; - linesize = extend_left + extend_right + w; - - for (i = 0; i < extend_top; i++) { - memcpy(dst_ptr1, src_ptr1, linesize * sizeof(src_ptr1[0])); - dst_ptr1 += dst_pitch; - } - - for (i = 0; i < extend_bottom; i++) { - memcpy(dst_ptr2, src_ptr2, linesize * sizeof(src_ptr2[0])); - dst_ptr2 += dst_pitch; - } -} - -void av1_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst) { - // Extend src frame in buffer - // Altref filtering assumes 16 pixel extension - const int et_y = 16; - const int el_y = 16; - // Motion estimation may use src block variance with the block size up - // to 64x64, so the right and bottom need to be extended to 64 multiple - // or up to 16, whichever is greater. - const int er_y = - AOMMAX(src->y_width + 16, ALIGN_POWER_OF_TWO(src->y_width, 6)) - - src->y_crop_width; - const int eb_y = - AOMMAX(src->y_height + 16, ALIGN_POWER_OF_TWO(src->y_height, 6)) - - src->y_crop_height; - const int uv_width_subsampling = (src->uv_width != src->y_width); - const int uv_height_subsampling = (src->uv_height != src->y_height); - const int et_uv = et_y >> uv_height_subsampling; - const int el_uv = el_y >> uv_width_subsampling; - const int eb_uv = eb_y >> uv_height_subsampling; - const int er_uv = er_y >> uv_width_subsampling; - - if (src->flags & YV12_FLAG_HIGHBITDEPTH) { - highbd_copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer, - dst->y_stride, src->y_crop_width, - src->y_crop_height, et_y, el_y, eb_y, er_y); - - highbd_copy_and_extend_plane( - src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, - src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv); - - highbd_copy_and_extend_plane( - src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, - src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv); - return; - } - - copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer, - dst->y_stride, src->y_crop_width, src->y_crop_height, - et_y, el_y, eb_y, er_y); - - copy_and_extend_plane(src->u_buffer, src->uv_stride, dst->u_buffer, - dst->uv_stride, src->uv_crop_width, src->uv_crop_height, - et_uv, el_uv, eb_uv, er_uv); - - copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer, - dst->uv_stride, src->uv_crop_width, src->uv_crop_height, - et_uv, el_uv, eb_uv, er_uv); -} - -void av1_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, int srcy, - int srcx, int srch, int srcw) { - // If the side is not touching the bounder then don't extend. - const int et_y = srcy ? 0 : dst->border; - const int el_y = srcx ? 0 : dst->border; - const int eb_y = srcy + srch != src->y_height - ? 0 - : dst->border + dst->y_height - src->y_height; - const int er_y = srcx + srcw != src->y_width - ? 0 - : dst->border + dst->y_width - src->y_width; - const int src_y_offset = srcy * src->y_stride + srcx; - const int dst_y_offset = srcy * dst->y_stride + srcx; - - const int et_uv = ROUND_POWER_OF_TWO(et_y, 1); - const int el_uv = ROUND_POWER_OF_TWO(el_y, 1); - const int eb_uv = ROUND_POWER_OF_TWO(eb_y, 1); - const int er_uv = ROUND_POWER_OF_TWO(er_y, 1); - const int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1); - const int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1); - const int srch_uv = ROUND_POWER_OF_TWO(srch, 1); - const int srcw_uv = ROUND_POWER_OF_TWO(srcw, 1); - - copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride, - dst->y_buffer + dst_y_offset, dst->y_stride, srcw, srch, - et_y, el_y, eb_y, er_y); - - copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride, - dst->u_buffer + dst_uv_offset, dst->uv_stride, srcw_uv, - srch_uv, et_uv, el_uv, eb_uv, er_uv); - - copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride, - dst->v_buffer + dst_uv_offset, dst->uv_stride, srcw_uv, - srch_uv, et_uv, el_uv, eb_uv, er_uv); -} diff --git a/third_party/aom/av1/encoder/extend.h b/third_party/aom/av1/encoder/extend.h deleted file mode 100644 index e0432cc97..000000000 --- a/third_party/aom/av1/encoder/extend.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_EXTEND_H_ -#define AOM_AV1_ENCODER_EXTEND_H_ - -#include "aom_scale/yv12config.h" -#include "aom/aom_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void av1_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst); - -void av1_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, int srcy, - int srcx, int srch, int srcw); -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_EXTEND_H_ diff --git a/third_party/aom/av1/encoder/firstpass.c b/third_party/aom/av1/encoder/firstpass.c deleted file mode 100644 index 69dd20c52..000000000 --- a/third_party/aom/av1/encoder/firstpass.c +++ /dev/null @@ -1,3480 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include - -#include "config/aom_dsp_rtcd.h" -#include "config/aom_scale_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/mem.h" -#include "aom_ports/system_state.h" -#include "aom_scale/aom_scale.h" -#include "aom_scale/yv12config.h" - -#include "aom_dsp/variance.h" -#include "av1/common/entropymv.h" -#include "av1/common/quant_common.h" -#include "av1/common/reconinter.h" // av1_setup_dst_planes() -#include "av1/common/txb_common.h" -#include "av1/encoder/aq_variance.h" -#include "av1/encoder/av1_quantize.h" -#include "av1/encoder/block.h" -#include "av1/encoder/dwt.h" -#include "av1/encoder/encodeframe.h" -#include "av1/encoder/encodemb.h" -#include "av1/encoder/encodemv.h" -#include "av1/encoder/encoder.h" -#include "av1/encoder/extend.h" -#include "av1/encoder/firstpass.h" -#include "av1/encoder/mcomp.h" -#include "av1/encoder/rd.h" -#include "av1/encoder/reconinter_enc.h" - -#define OUTPUT_FPF 0 -#define ARF_STATS_OUTPUT 0 - -#define GROUP_ADAPTIVE_MAXQ 1 - -#define BOOST_BREAKOUT 12.5 -#define BOOST_FACTOR 12.5 -#define FACTOR_PT_LOW 0.70 -#define FACTOR_PT_HIGH 0.90 -#define FIRST_PASS_Q 10.0 -#define GF_MAX_BOOST 90.0 -#define INTRA_MODE_PENALTY 1024 -#define KF_MIN_FRAME_BOOST 80.0 -#define KF_MAX_FRAME_BOOST 128.0 -#define MIN_ARF_GF_BOOST 240 -#define MIN_DECAY_FACTOR 0.01 -#define MIN_KF_BOOST 300 -#define NEW_MV_MODE_PENALTY 32 -#define DARK_THRESH 64 -#define DEFAULT_GRP_WEIGHT 1.0 -#define RC_FACTOR_MIN 0.75 -#define RC_FACTOR_MAX 1.75 -#define MIN_FWD_KF_INTERVAL 8 - -#define NCOUNT_INTRA_THRESH 8192 -#define NCOUNT_INTRA_FACTOR 3 -#define NCOUNT_FRAME_II_THRESH 5.0 - -#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x)-0.000001 : (x) + 0.000001) - -#if ARF_STATS_OUTPUT -unsigned int arf_count = 0; -#endif - -// Resets the first pass file to the given position using a relative seek from -// the current position. -static void reset_fpf_position(TWO_PASS *p, const FIRSTPASS_STATS *position) { - p->stats_in = position; -} - -// Read frame stats at an offset from the current position. -static const FIRSTPASS_STATS *read_frame_stats(const TWO_PASS *p, int offset) { - if ((offset >= 0 && p->stats_in + offset >= p->stats_in_end) || - (offset < 0 && p->stats_in + offset < p->stats_in_start)) { - return NULL; - } - - return &p->stats_in[offset]; -} - -static int input_stats(TWO_PASS *p, FIRSTPASS_STATS *fps) { - if (p->stats_in >= p->stats_in_end) return EOF; - - *fps = *p->stats_in; - ++p->stats_in; - return 1; -} - -static void output_stats(FIRSTPASS_STATS *stats, - struct aom_codec_pkt_list *pktlist) { - struct aom_codec_cx_pkt pkt; - pkt.kind = AOM_CODEC_STATS_PKT; - pkt.data.twopass_stats.buf = stats; - pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS); - aom_codec_pkt_list_add(pktlist, &pkt); - -// TEMP debug code -#if OUTPUT_FPF - { - FILE *fpfile; - fpfile = fopen("firstpass.stt", "a"); - - fprintf(fpfile, - "%12.0lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf" - "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf" - "%12.4lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf\n", - stats->frame, stats->weight, stats->intra_error, stats->coded_error, - stats->sr_coded_error, stats->pcnt_inter, stats->pcnt_motion, - stats->pcnt_second_ref, stats->pcnt_neutral, stats->intra_skip_pct, - stats->inactive_zone_rows, stats->inactive_zone_cols, stats->MVr, - stats->mvr_abs, stats->MVc, stats->mvc_abs, stats->MVrv, - stats->MVcv, stats->mv_in_out_count, stats->new_mv_count, - stats->count, stats->duration); - fclose(fpfile); - } -#endif -} - -#if CONFIG_FP_MB_STATS -static void output_fpmb_stats(uint8_t *this_frame_mb_stats, int stats_size, - struct aom_codec_pkt_list *pktlist) { - struct aom_codec_cx_pkt pkt; - pkt.kind = AOM_CODEC_FPMB_STATS_PKT; - pkt.data.firstpass_mb_stats.buf = this_frame_mb_stats; - pkt.data.firstpass_mb_stats.sz = stats_size * sizeof(*this_frame_mb_stats); - aom_codec_pkt_list_add(pktlist, &pkt); -} -#endif - -static void zero_stats(FIRSTPASS_STATS *section) { - section->frame = 0.0; - section->weight = 0.0; - section->intra_error = 0.0; - section->frame_avg_wavelet_energy = 0.0; - section->coded_error = 0.0; - section->sr_coded_error = 0.0; - section->pcnt_inter = 0.0; - section->pcnt_motion = 0.0; - section->pcnt_second_ref = 0.0; - section->pcnt_neutral = 0.0; - section->intra_skip_pct = 0.0; - section->inactive_zone_rows = 0.0; - section->inactive_zone_cols = 0.0; - section->MVr = 0.0; - section->mvr_abs = 0.0; - section->MVc = 0.0; - section->mvc_abs = 0.0; - section->MVrv = 0.0; - section->MVcv = 0.0; - section->mv_in_out_count = 0.0; - section->new_mv_count = 0.0; - section->count = 0.0; - section->duration = 1.0; -} - -static void accumulate_stats(FIRSTPASS_STATS *section, - const FIRSTPASS_STATS *frame) { - section->frame += frame->frame; - section->weight += frame->weight; - section->intra_error += frame->intra_error; - section->frame_avg_wavelet_energy += frame->frame_avg_wavelet_energy; - section->coded_error += frame->coded_error; - section->sr_coded_error += frame->sr_coded_error; - section->pcnt_inter += frame->pcnt_inter; - section->pcnt_motion += frame->pcnt_motion; - section->pcnt_second_ref += frame->pcnt_second_ref; - section->pcnt_neutral += frame->pcnt_neutral; - section->intra_skip_pct += frame->intra_skip_pct; - section->inactive_zone_rows += frame->inactive_zone_rows; - section->inactive_zone_cols += frame->inactive_zone_cols; - section->MVr += frame->MVr; - section->mvr_abs += frame->mvr_abs; - section->MVc += frame->MVc; - section->mvc_abs += frame->mvc_abs; - section->MVrv += frame->MVrv; - section->MVcv += frame->MVcv; - section->mv_in_out_count += frame->mv_in_out_count; - section->new_mv_count += frame->new_mv_count; - section->count += frame->count; - section->duration += frame->duration; -} - -static void subtract_stats(FIRSTPASS_STATS *section, - const FIRSTPASS_STATS *frame) { - section->frame -= frame->frame; - section->weight -= frame->weight; - section->intra_error -= frame->intra_error; - section->frame_avg_wavelet_energy -= frame->frame_avg_wavelet_energy; - section->coded_error -= frame->coded_error; - section->sr_coded_error -= frame->sr_coded_error; - section->pcnt_inter -= frame->pcnt_inter; - section->pcnt_motion -= frame->pcnt_motion; - section->pcnt_second_ref -= frame->pcnt_second_ref; - section->pcnt_neutral -= frame->pcnt_neutral; - section->intra_skip_pct -= frame->intra_skip_pct; - section->inactive_zone_rows -= frame->inactive_zone_rows; - section->inactive_zone_cols -= frame->inactive_zone_cols; - section->MVr -= frame->MVr; - section->mvr_abs -= frame->mvr_abs; - section->MVc -= frame->MVc; - section->mvc_abs -= frame->mvc_abs; - section->MVrv -= frame->MVrv; - section->MVcv -= frame->MVcv; - section->mv_in_out_count -= frame->mv_in_out_count; - section->new_mv_count -= frame->new_mv_count; - section->count -= frame->count; - section->duration -= frame->duration; -} - -// Calculate the linear size relative to a baseline of 1080P -#define BASE_SIZE 2073600.0 // 1920x1080 -static double get_linear_size_factor(const AV1_COMP *cpi) { - const double this_area = cpi->initial_width * cpi->initial_height; - return pow(this_area / BASE_SIZE, 0.5); -} - -// Calculate an active area of the image that discounts formatting -// bars and partially discounts other 0 energy areas. -#define MIN_ACTIVE_AREA 0.5 -#define MAX_ACTIVE_AREA 1.0 -static double calculate_active_area(const AV1_COMP *cpi, - const FIRSTPASS_STATS *this_frame) { - double active_pct; - - active_pct = - 1.0 - - ((this_frame->intra_skip_pct / 2) + - ((this_frame->inactive_zone_rows * 2) / (double)cpi->common.mb_rows)); - return fclamp(active_pct, MIN_ACTIVE_AREA, MAX_ACTIVE_AREA); -} - -// Calculate a modified Error used in distributing bits between easier and -// harder frames. -#define ACT_AREA_CORRECTION 0.5 -static double calculate_modified_err(const AV1_COMP *cpi, - const TWO_PASS *twopass, - const AV1EncoderConfig *oxcf, - const FIRSTPASS_STATS *this_frame) { - const FIRSTPASS_STATS *const stats = &twopass->total_stats; - const double av_weight = stats->weight / stats->count; - const double av_err = (stats->coded_error * av_weight) / stats->count; - double modified_error = - av_err * pow(this_frame->coded_error * this_frame->weight / - DOUBLE_DIVIDE_CHECK(av_err), - oxcf->two_pass_vbrbias / 100.0); - - // Correction for active area. Frames with a reduced active area - // (eg due to formatting bars) have a higher error per mb for the - // remaining active MBs. The correction here assumes that coding - // 0.5N blocks of complexity 2X is a little easier than coding N - // blocks of complexity X. - modified_error *= - pow(calculate_active_area(cpi, this_frame), ACT_AREA_CORRECTION); - - return fclamp(modified_error, twopass->modified_error_min, - twopass->modified_error_max); -} - -// This function returns the maximum target rate per frame. -static int frame_max_bits(const RATE_CONTROL *rc, - const AV1EncoderConfig *oxcf) { - int64_t max_bits = ((int64_t)rc->avg_frame_bandwidth * - (int64_t)oxcf->two_pass_vbrmax_section) / - 100; - if (max_bits < 0) - max_bits = 0; - else if (max_bits > rc->max_frame_bandwidth) - max_bits = rc->max_frame_bandwidth; - - return (int)max_bits; -} - -void av1_init_first_pass(AV1_COMP *cpi) { - zero_stats(&cpi->twopass.total_stats); -} - -void av1_end_first_pass(AV1_COMP *cpi) { - output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list); -} - -static aom_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) { - switch (bsize) { - case BLOCK_8X8: return aom_mse8x8; - case BLOCK_16X8: return aom_mse16x8; - case BLOCK_8X16: return aom_mse8x16; - default: return aom_mse16x16; - } -} - -static unsigned int get_prediction_error(BLOCK_SIZE bsize, - const struct buf_2d *src, - const struct buf_2d *ref) { - unsigned int sse; - const aom_variance_fn_t fn = get_block_variance_fn(bsize); - fn(src->buf, src->stride, ref->buf, ref->stride, &sse); - return sse; -} - -static aom_variance_fn_t highbd_get_block_variance_fn(BLOCK_SIZE bsize, - int bd) { - switch (bd) { - default: - switch (bsize) { - case BLOCK_8X8: return aom_highbd_8_mse8x8; - case BLOCK_16X8: return aom_highbd_8_mse16x8; - case BLOCK_8X16: return aom_highbd_8_mse8x16; - default: return aom_highbd_8_mse16x16; - } - break; - case 10: - switch (bsize) { - case BLOCK_8X8: return aom_highbd_10_mse8x8; - case BLOCK_16X8: return aom_highbd_10_mse16x8; - case BLOCK_8X16: return aom_highbd_10_mse8x16; - default: return aom_highbd_10_mse16x16; - } - break; - case 12: - switch (bsize) { - case BLOCK_8X8: return aom_highbd_12_mse8x8; - case BLOCK_16X8: return aom_highbd_12_mse16x8; - case BLOCK_8X16: return aom_highbd_12_mse8x16; - default: return aom_highbd_12_mse16x16; - } - break; - } -} - -static unsigned int highbd_get_prediction_error(BLOCK_SIZE bsize, - const struct buf_2d *src, - const struct buf_2d *ref, - int bd) { - unsigned int sse; - const aom_variance_fn_t fn = highbd_get_block_variance_fn(bsize, bd); - fn(src->buf, src->stride, ref->buf, ref->stride, &sse); - return sse; -} - -// Refine the motion search range according to the frame dimension -// for first pass test. -static int get_search_range(const AV1_COMP *cpi) { - int sr = 0; - const int dim = AOMMIN(cpi->initial_width, cpi->initial_height); - - while ((dim << sr) < MAX_FULL_PEL_VAL) ++sr; - return sr; -} - -static void first_pass_motion_search(AV1_COMP *cpi, MACROBLOCK *x, - const MV *ref_mv, MV *best_mv, - int *best_motion_err) { - MACROBLOCKD *const xd = &x->e_mbd; - MV tmp_mv = kZeroMv; - MV ref_mv_full = { ref_mv->row >> 3, ref_mv->col >> 3 }; - int num00, tmp_err, n; - const BLOCK_SIZE bsize = xd->mi[0]->sb_type; - aom_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize]; - const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY; - - int step_param = 3; - int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; - const int sr = get_search_range(cpi); - step_param += sr; - further_steps -= sr; - - // Override the default variance function to use MSE. - v_fn_ptr.vf = get_block_variance_fn(bsize); - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, xd->bd); - } - - // Center the initial step/diamond search on best mv. - tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv, - step_param, x->sadperbit16, &num00, - &v_fn_ptr, ref_mv); - if (tmp_err < INT_MAX) - tmp_err = av1_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1); - if (tmp_err < INT_MAX - new_mv_mode_penalty) tmp_err += new_mv_mode_penalty; - - if (tmp_err < *best_motion_err) { - *best_motion_err = tmp_err; - *best_mv = tmp_mv; - } - - // Carry out further step/diamond searches as necessary. - n = num00; - num00 = 0; - - while (n < further_steps) { - ++n; - - if (num00) { - --num00; - } else { - tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv, - step_param + n, x->sadperbit16, &num00, - &v_fn_ptr, ref_mv); - if (tmp_err < INT_MAX) - tmp_err = av1_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1); - if (tmp_err < INT_MAX - new_mv_mode_penalty) - tmp_err += new_mv_mode_penalty; - - if (tmp_err < *best_motion_err) { - *best_motion_err = tmp_err; - *best_mv = tmp_mv; - } - } - } -} - -static BLOCK_SIZE get_bsize(const AV1_COMMON *cm, int mb_row, int mb_col) { - if (mi_size_wide[BLOCK_16X16] * mb_col + mi_size_wide[BLOCK_8X8] < - cm->mi_cols) { - return mi_size_wide[BLOCK_16X16] * mb_row + mi_size_wide[BLOCK_8X8] < - cm->mi_rows - ? BLOCK_16X16 - : BLOCK_16X8; - } else { - return mi_size_wide[BLOCK_16X16] * mb_row + mi_size_wide[BLOCK_8X8] < - cm->mi_rows - ? BLOCK_8X16 - : BLOCK_8X8; - } -} - -static int find_fp_qindex(aom_bit_depth_t bit_depth) { - int i; - - for (i = 0; i < QINDEX_RANGE; ++i) - if (av1_convert_qindex_to_q(i, bit_depth) >= FIRST_PASS_Q) break; - - if (i == QINDEX_RANGE) i--; - - return i; -} - -static void set_first_pass_params(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - if (!cpi->refresh_alt_ref_frame && - (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY))) { - cm->frame_type = KEY_FRAME; - } else { - cm->frame_type = INTER_FRAME; - } - // Do not use periodic key frames. - cpi->rc.frames_to_key = INT_MAX; -} - -static double raw_motion_error_stdev(int *raw_motion_err_list, - int raw_motion_err_counts) { - int64_t sum_raw_err = 0; - double raw_err_avg = 0; - double raw_err_stdev = 0; - if (raw_motion_err_counts == 0) return 0; - - int i; - for (i = 0; i < raw_motion_err_counts; i++) { - sum_raw_err += raw_motion_err_list[i]; - } - raw_err_avg = (double)sum_raw_err / raw_motion_err_counts; - for (i = 0; i < raw_motion_err_counts; i++) { - raw_err_stdev += (raw_motion_err_list[i] - raw_err_avg) * - (raw_motion_err_list[i] - raw_err_avg); - } - // Calculate the standard deviation for the motion error of all the inter - // blocks of the 0,0 motion using the last source - // frame as the reference. - raw_err_stdev = sqrt(raw_err_stdev / raw_motion_err_counts); - return raw_err_stdev; -} - -#define UL_INTRA_THRESH 50 -#define INVALID_ROW -1 -void av1_first_pass(AV1_COMP *cpi, const struct lookahead_entry *source) { - int mb_row, mb_col; - MACROBLOCK *const x = &cpi->td.mb; - AV1_COMMON *const cm = &cpi->common; - const SequenceHeader *const seq_params = &cm->seq_params; - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *const xd = &x->e_mbd; - TileInfo tile; - struct macroblock_plane *const p = x->plane; - struct macroblockd_plane *const pd = xd->plane; - const PICK_MODE_CONTEXT *ctx = - &cpi->td.pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2]->none; - int i; - - int recon_yoffset, recon_uvoffset; - int64_t intra_error = 0; - int64_t frame_avg_wavelet_energy = 0; - int64_t coded_error = 0; - int64_t sr_coded_error = 0; - - int sum_mvr = 0, sum_mvc = 0; - int sum_mvr_abs = 0, sum_mvc_abs = 0; - int64_t sum_mvrs = 0, sum_mvcs = 0; - int mvcount = 0; - int intercount = 0; - int second_ref_count = 0; - const int intrapenalty = INTRA_MODE_PENALTY; - double neutral_count; - int intra_skip_count = 0; - int image_data_start_row = INVALID_ROW; - int new_mv_count = 0; - int sum_in_vectors = 0; - MV lastmv = kZeroMv; - TWO_PASS *twopass = &cpi->twopass; - int recon_y_stride, recon_uv_stride, uv_mb_height; - - YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); - YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); - YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm); - const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; - double intra_factor; - double brightness_factor; - BufferPool *const pool = cm->buffer_pool; - const int qindex = find_fp_qindex(seq_params->bit_depth); - const int mb_scale = mi_size_wide[BLOCK_16X16]; - - int *raw_motion_err_list; - int raw_motion_err_counts = 0; - CHECK_MEM_ERROR( - cm, raw_motion_err_list, - aom_calloc(cm->mb_rows * cm->mb_cols, sizeof(*raw_motion_err_list))); - // First pass code requires valid last and new frame buffers. - assert(new_yv12 != NULL); - assert(frame_is_intra_only(cm) || (lst_yv12 != NULL)); - -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - av1_zero_array(cpi->twopass.frame_mb_stats_buf, cpi->initial_mbs); - } -#endif - - aom_clear_system_state(); - - xd->mi = cm->mi_grid_visible; - xd->mi[0] = cm->mi; - x->e_mbd.mi[0]->sb_type = BLOCK_16X16; - - intra_factor = 0.0; - brightness_factor = 0.0; - neutral_count = 0.0; - - set_first_pass_params(cpi); - av1_set_quantizer(cm, qindex); - - av1_setup_block_planes(&x->e_mbd, seq_params->subsampling_x, - seq_params->subsampling_y, num_planes); - - av1_setup_src_planes(x, cpi->source, 0, 0, num_planes); - av1_setup_dst_planes(xd->plane, seq_params->sb_size, new_yv12, 0, 0, 0, - num_planes); - - if (!frame_is_intra_only(cm)) { - av1_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL, num_planes); - } - - xd->mi = cm->mi_grid_visible; - xd->mi[0] = cm->mi; - - // Don't store luma on the fist pass since chroma is not computed - xd->cfl.store_y = 0; - av1_frame_init_quantizer(cpi); - - for (i = 0; i < num_planes; ++i) { - p[i].coeff = ctx->coeff[i]; - p[i].qcoeff = ctx->qcoeff[i]; - pd[i].dqcoeff = ctx->dqcoeff[i]; - p[i].eobs = ctx->eobs[i]; - p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i]; - } - - av1_init_mv_probs(cm); - av1_init_lv_map(cm); - av1_initialize_rd_consts(cpi); - - // Tiling is ignored in the first pass. - av1_tile_init(&tile, cm, 0, 0); - - recon_y_stride = new_yv12->y_stride; - recon_uv_stride = new_yv12->uv_stride; - uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height); - - for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { - MV best_ref_mv = kZeroMv; - - // Reset above block coeffs. - xd->up_available = (mb_row != 0); - recon_yoffset = (mb_row * recon_y_stride * 16); - recon_uvoffset = (mb_row * recon_uv_stride * uv_mb_height); - - // Set up limit values for motion vectors to prevent them extending - // outside the UMV borders. - x->mv_limits.row_min = -((mb_row * 16) + BORDER_MV_PIXELS_B16); - x->mv_limits.row_max = - ((cm->mb_rows - 1 - mb_row) * 16) + BORDER_MV_PIXELS_B16; - - for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { - int this_error; - const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); - const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col); - double log_intra; - int level_sample; - -#if CONFIG_FP_MB_STATS - const int mb_index = mb_row * cm->mb_cols + mb_col; -#endif - - aom_clear_system_state(); - - const int idx_str = xd->mi_stride * mb_row * mb_scale + mb_col * mb_scale; - xd->mi = cm->mi_grid_visible + idx_str; - xd->mi[0] = cm->mi + idx_str; - xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset; - xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset; - xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset; - xd->left_available = (mb_col != 0); - xd->mi[0]->sb_type = bsize; - xd->mi[0]->ref_frame[0] = INTRA_FRAME; - set_mi_row_col(xd, &tile, mb_row * mb_scale, mi_size_high[bsize], - mb_col * mb_scale, mi_size_wide[bsize], cm->mi_rows, - cm->mi_cols); - - set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize], num_planes); - - // Do intra 16x16 prediction. - xd->mi[0]->segment_id = 0; - xd->lossless[xd->mi[0]->segment_id] = (qindex == 0); - xd->mi[0]->mode = DC_PRED; - xd->mi[0]->tx_size = - use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4; - av1_encode_intra_block_plane(cpi, x, bsize, 0, 0, mb_row * 2, mb_col * 2); - this_error = aom_get_mb_ss(x->plane[0].src_diff); - - // Keep a record of blocks that have almost no intra error residual - // (i.e. are in effect completely flat and untextured in the intra - // domain). In natural videos this is uncommon, but it is much more - // common in animations, graphics and screen content, so may be used - // as a signal to detect these types of content. - if (this_error < UL_INTRA_THRESH) { - ++intra_skip_count; - } else if ((mb_col > 0) && (image_data_start_row == INVALID_ROW)) { - image_data_start_row = mb_row; - } - - if (seq_params->use_highbitdepth) { - switch (seq_params->bit_depth) { - case AOM_BITS_8: break; - case AOM_BITS_10: this_error >>= 4; break; - case AOM_BITS_12: this_error >>= 8; break; - default: - assert(0 && - "seq_params->bit_depth should be AOM_BITS_8, " - "AOM_BITS_10 or AOM_BITS_12"); - return; - } - } - - aom_clear_system_state(); - log_intra = log(this_error + 1.0); - if (log_intra < 10.0) - intra_factor += 1.0 + ((10.0 - log_intra) * 0.05); - else - intra_factor += 1.0; - - if (seq_params->use_highbitdepth) - level_sample = CONVERT_TO_SHORTPTR(x->plane[0].src.buf)[0]; - else - level_sample = x->plane[0].src.buf[0]; - if ((level_sample < DARK_THRESH) && (log_intra < 9.0)) - brightness_factor += 1.0 + (0.01 * (DARK_THRESH - level_sample)); - else - brightness_factor += 1.0; - - // Intrapenalty below deals with situations where the intra and inter - // error scores are very low (e.g. a plain black frame). - // We do not have special cases in first pass for 0,0 and nearest etc so - // all inter modes carry an overhead cost estimate for the mv. - // When the error score is very low this causes us to pick all or lots of - // INTRA modes and throw lots of key frames. - // This penalty adds a cost matching that of a 0,0 mv to the intra case. - this_error += intrapenalty; - - // Accumulate the intra error. - intra_error += (int64_t)this_error; - - int stride = x->plane[0].src.stride; - uint8_t *buf = x->plane[0].src.buf; - for (int r8 = 0; r8 < 2; ++r8) - for (int c8 = 0; c8 < 2; ++c8) { - int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH; - frame_avg_wavelet_energy += av1_haar_ac_sad_8x8_uint8_input( - buf + c8 * 8 + r8 * 8 * stride, stride, hbd); - } - -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - // initialization - cpi->twopass.frame_mb_stats_buf[mb_index] = 0; - } -#endif - - // Set up limit values for motion vectors to prevent them extending - // outside the UMV borders. - x->mv_limits.col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16); - x->mv_limits.col_max = - ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16; - - if (!frame_is_intra_only(cm)) { // Do a motion search - int tmp_err, motion_error, raw_motion_error; - // Assume 0,0 motion with no mv overhead. - MV mv = kZeroMv, tmp_mv = kZeroMv; - struct buf_2d unscaled_last_source_buf_2d; - - xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - motion_error = highbd_get_prediction_error( - bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd); - } else { - motion_error = get_prediction_error(bsize, &x->plane[0].src, - &xd->plane[0].pre[0]); - } - - // Compute the motion error of the 0,0 motion using the last source - // frame as the reference. Skip the further motion search on - // reconstructed frame if this error is small. - unscaled_last_source_buf_2d.buf = - cpi->unscaled_last_source->y_buffer + recon_yoffset; - unscaled_last_source_buf_2d.stride = - cpi->unscaled_last_source->y_stride; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - raw_motion_error = highbd_get_prediction_error( - bsize, &x->plane[0].src, &unscaled_last_source_buf_2d, xd->bd); - } else { - raw_motion_error = get_prediction_error(bsize, &x->plane[0].src, - &unscaled_last_source_buf_2d); - } - - // TODO(pengchong): Replace the hard-coded threshold - if (raw_motion_error > 25) { - // Test last reference frame using the previous best mv as the - // starting point (best reference) for the search. - first_pass_motion_search(cpi, x, &best_ref_mv, &mv, &motion_error); - - // If the current best reference mv is not centered on 0,0 then do a - // 0,0 based search as well. - if (!is_zero_mv(&best_ref_mv)) { - tmp_err = INT_MAX; - first_pass_motion_search(cpi, x, &kZeroMv, &tmp_mv, &tmp_err); - - if (tmp_err < motion_error) { - motion_error = tmp_err; - mv = tmp_mv; - } - } - - // Search in an older reference frame. - if ((cm->current_video_frame > 1) && gld_yv12 != NULL) { - // Assume 0,0 motion with no mv overhead. - int gf_motion_error; - - xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - gf_motion_error = highbd_get_prediction_error( - bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd); - } else { - gf_motion_error = get_prediction_error(bsize, &x->plane[0].src, - &xd->plane[0].pre[0]); - } - - first_pass_motion_search(cpi, x, &kZeroMv, &tmp_mv, - &gf_motion_error); - - if (gf_motion_error < motion_error && gf_motion_error < this_error) - ++second_ref_count; - - // Reset to last frame as reference buffer. - xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; - xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset; - xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset; - - // In accumulating a score for the older reference frame take the - // best of the motion predicted score and the intra coded error - // (just as will be done for) accumulation of "coded_error" for - // the last frame. - if (gf_motion_error < this_error) - sr_coded_error += gf_motion_error; - else - sr_coded_error += this_error; - } else { - sr_coded_error += motion_error; - } - } else { - sr_coded_error += motion_error; - } - - // Start by assuming that intra mode is best. - best_ref_mv.row = 0; - best_ref_mv.col = 0; - -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - // intra predication statistics - cpi->twopass.frame_mb_stats_buf[mb_index] = 0; - cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_DCINTRA_MASK; - cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK; - if (this_error > FPMB_ERROR_LARGE_TH) { - cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_LARGE_MASK; - } else if (this_error < FPMB_ERROR_SMALL_TH) { - cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_SMALL_MASK; - } - } -#endif - - if (motion_error <= this_error) { - aom_clear_system_state(); - - // Keep a count of cases where the inter and intra were very close - // and very low. This helps with scene cut detection for example in - // cropped clips with black bars at the sides or top and bottom. - if (((this_error - intrapenalty) * 9 <= motion_error * 10) && - (this_error < (2 * intrapenalty))) { - neutral_count += 1.0; - // Also track cases where the intra is not much worse than the inter - // and use this in limiting the GF/arf group length. - } else if ((this_error > NCOUNT_INTRA_THRESH) && - (this_error < (NCOUNT_INTRA_FACTOR * motion_error))) { - neutral_count += - (double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error); - } - - mv.row *= 8; - mv.col *= 8; - this_error = motion_error; - xd->mi[0]->mode = NEWMV; - xd->mi[0]->mv[0].as_mv = mv; - xd->mi[0]->tx_size = TX_4X4; - xd->mi[0]->ref_frame[0] = LAST_FRAME; - xd->mi[0]->ref_frame[1] = NONE_FRAME; - av1_build_inter_predictors_sby(cm, xd, mb_row * mb_scale, - mb_col * mb_scale, NULL, bsize); - av1_encode_sby_pass1(cm, x, bsize); - sum_mvr += mv.row; - sum_mvr_abs += abs(mv.row); - sum_mvc += mv.col; - sum_mvc_abs += abs(mv.col); - sum_mvrs += mv.row * mv.row; - sum_mvcs += mv.col * mv.col; - ++intercount; - - best_ref_mv = mv; - -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - // inter predication statistics - cpi->twopass.frame_mb_stats_buf[mb_index] = 0; - cpi->twopass.frame_mb_stats_buf[mb_index] &= ~FPMB_DCINTRA_MASK; - cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK; - if (this_error > FPMB_ERROR_LARGE_TH) { - cpi->twopass.frame_mb_stats_buf[mb_index] |= - FPMB_ERROR_LARGE_MASK; - } else if (this_error < FPMB_ERROR_SMALL_TH) { - cpi->twopass.frame_mb_stats_buf[mb_index] |= - FPMB_ERROR_SMALL_MASK; - } - } -#endif - - if (!is_zero_mv(&mv)) { - ++mvcount; - -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - cpi->twopass.frame_mb_stats_buf[mb_index] &= - ~FPMB_MOTION_ZERO_MASK; - // check estimated motion direction - if (mv.col > 0 && mv.col >= abs(mv.row)) { - // right direction - cpi->twopass.frame_mb_stats_buf[mb_index] |= - FPMB_MOTION_RIGHT_MASK; - } else if (mv.row < 0 && abs(mv.row) >= abs(mv.col)) { - // up direction - cpi->twopass.frame_mb_stats_buf[mb_index] |= - FPMB_MOTION_UP_MASK; - } else if (mv.col < 0 && abs(mv.col) >= abs(mv.row)) { - // left direction - cpi->twopass.frame_mb_stats_buf[mb_index] |= - FPMB_MOTION_LEFT_MASK; - } else { - // down direction - cpi->twopass.frame_mb_stats_buf[mb_index] |= - FPMB_MOTION_DOWN_MASK; - } - } -#endif - - // Non-zero vector, was it different from the last non zero vector? - if (!is_equal_mv(&mv, &lastmv)) ++new_mv_count; - lastmv = mv; - - // Does the row vector point inwards or outwards? - if (mb_row < cm->mb_rows / 2) { - if (mv.row > 0) - --sum_in_vectors; - else if (mv.row < 0) - ++sum_in_vectors; - } else if (mb_row > cm->mb_rows / 2) { - if (mv.row > 0) - ++sum_in_vectors; - else if (mv.row < 0) - --sum_in_vectors; - } - - // Does the col vector point inwards or outwards? - if (mb_col < cm->mb_cols / 2) { - if (mv.col > 0) - --sum_in_vectors; - else if (mv.col < 0) - ++sum_in_vectors; - } else if (mb_col > cm->mb_cols / 2) { - if (mv.col > 0) - ++sum_in_vectors; - else if (mv.col < 0) - --sum_in_vectors; - } - } - } - raw_motion_err_list[raw_motion_err_counts++] = raw_motion_error; - } else { - sr_coded_error += (int64_t)this_error; - } - coded_error += (int64_t)this_error; - - // Adjust to the next column of MBs. - x->plane[0].src.buf += 16; - x->plane[1].src.buf += uv_mb_height; - x->plane[2].src.buf += uv_mb_height; - - recon_yoffset += 16; - recon_uvoffset += uv_mb_height; - } - // Adjust to the next row of MBs. - x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols; - x->plane[1].src.buf += - uv_mb_height * x->plane[1].src.stride - uv_mb_height * cm->mb_cols; - x->plane[2].src.buf += - uv_mb_height * x->plane[1].src.stride - uv_mb_height * cm->mb_cols; - - aom_clear_system_state(); - } - const double raw_err_stdev = - raw_motion_error_stdev(raw_motion_err_list, raw_motion_err_counts); - aom_free(raw_motion_err_list); - - // Clamp the image start to rows/2. This number of rows is discarded top - // and bottom as dead data so rows / 2 means the frame is blank. - if ((image_data_start_row > cm->mb_rows / 2) || - (image_data_start_row == INVALID_ROW)) { - image_data_start_row = cm->mb_rows / 2; - } - // Exclude any image dead zone - if (image_data_start_row > 0) { - intra_skip_count = - AOMMAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2)); - } - - { - FIRSTPASS_STATS fps; - // The minimum error here insures some bit allocation to frames even - // in static regions. The allocation per MB declines for larger formats - // where the typical "real" energy per MB also falls. - // Initial estimate here uses sqrt(mbs) to define the min_err, where the - // number of mbs is proportional to the image area. - const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) - ? cpi->initial_mbs - : cpi->common.MBs; - const double min_err = 200 * sqrt(num_mbs); - - intra_factor = intra_factor / (double)num_mbs; - brightness_factor = brightness_factor / (double)num_mbs; - fps.weight = intra_factor * brightness_factor; - - fps.frame = cm->current_video_frame; - fps.coded_error = (double)(coded_error >> 8) + min_err; - fps.sr_coded_error = (double)(sr_coded_error >> 8) + min_err; - fps.intra_error = (double)(intra_error >> 8) + min_err; - fps.frame_avg_wavelet_energy = (double)frame_avg_wavelet_energy; - fps.count = 1.0; - fps.pcnt_inter = (double)intercount / num_mbs; - fps.pcnt_second_ref = (double)second_ref_count / num_mbs; - fps.pcnt_neutral = (double)neutral_count / num_mbs; - fps.intra_skip_pct = (double)intra_skip_count / num_mbs; - fps.inactive_zone_rows = (double)image_data_start_row; - fps.inactive_zone_cols = (double)0; // TODO(paulwilkins): fix - fps.raw_error_stdev = raw_err_stdev; - - if (mvcount > 0) { - fps.MVr = (double)sum_mvr / mvcount; - fps.mvr_abs = (double)sum_mvr_abs / mvcount; - fps.MVc = (double)sum_mvc / mvcount; - fps.mvc_abs = (double)sum_mvc_abs / mvcount; - fps.MVrv = - ((double)sum_mvrs - ((double)sum_mvr * sum_mvr / mvcount)) / mvcount; - fps.MVcv = - ((double)sum_mvcs - ((double)sum_mvc * sum_mvc / mvcount)) / mvcount; - fps.mv_in_out_count = (double)sum_in_vectors / (mvcount * 2); - fps.new_mv_count = new_mv_count; - fps.pcnt_motion = (double)mvcount / num_mbs; - } else { - fps.MVr = 0.0; - fps.mvr_abs = 0.0; - fps.MVc = 0.0; - fps.mvc_abs = 0.0; - fps.MVrv = 0.0; - fps.MVcv = 0.0; - fps.mv_in_out_count = 0.0; - fps.new_mv_count = 0.0; - fps.pcnt_motion = 0.0; - } - - // TODO(paulwilkins): Handle the case when duration is set to 0, or - // something less than the full time between subsequent values of - // cpi->source_time_stamp. - fps.duration = (double)(source->ts_end - source->ts_start); - - // Don't want to do output stats with a stack variable! - twopass->this_frame_stats = fps; - output_stats(&twopass->this_frame_stats, cpi->output_pkt_list); - accumulate_stats(&twopass->total_stats, &fps); - -#if CONFIG_FP_MB_STATS - if (cpi->use_fp_mb_stats) { - output_fpmb_stats(twopass->frame_mb_stats_buf, cpi->initial_mbs, - cpi->output_pkt_list); - } -#endif - } - - // Copy the previous Last Frame back into gf and and arf buffers if - // the prediction is good enough... but also don't allow it to lag too far. - if ((twopass->sr_update_lag > 3) || - ((cm->current_video_frame > 0) && - (twopass->this_frame_stats.pcnt_inter > 0.20) && - ((twopass->this_frame_stats.intra_error / - DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) { - if (gld_yv12 != NULL) { - ref_cnt_fb(pool->frame_bufs, - &cm->ref_frame_map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]], - cm->ref_frame_map[cpi->ref_fb_idx[LAST_FRAME - 1]]); - } - twopass->sr_update_lag = 1; - } else { - ++twopass->sr_update_lag; - } - - aom_extend_frame_borders(new_yv12, num_planes); - - // The frame we just compressed now becomes the last frame. - ref_cnt_fb(pool->frame_bufs, - &cm->ref_frame_map[cpi->ref_fb_idx[LAST_FRAME - 1]], - cm->new_fb_idx); - - // Special case for the first frame. Copy into the GF buffer as a second - // reference. - if (cm->current_video_frame == 0 && - cpi->ref_fb_idx[GOLDEN_FRAME - 1] != INVALID_IDX) { - ref_cnt_fb(pool->frame_bufs, - &cm->ref_frame_map[cpi->ref_fb_idx[GOLDEN_FRAME - 1]], - cm->ref_frame_map[cpi->ref_fb_idx[LAST_FRAME - 1]]); - } - - // Use this to see what the first pass reconstruction looks like. - if (0) { - char filename[512]; - FILE *recon_file; - snprintf(filename, sizeof(filename), "enc%04d.yuv", - (int)cm->current_video_frame); - - if (cm->current_video_frame == 0) - recon_file = fopen(filename, "wb"); - else - recon_file = fopen(filename, "ab"); - - (void)fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file); - fclose(recon_file); - } - - ++cm->current_video_frame; -} - -static double calc_correction_factor(double err_per_mb, double err_divisor, - double pt_low, double pt_high, int q, - aom_bit_depth_t bit_depth) { - const double error_term = err_per_mb / err_divisor; - - // Adjustment based on actual quantizer to power term. - const double power_term = - AOMMIN(av1_convert_qindex_to_q(q, bit_depth) * 0.01 + pt_low, pt_high); - - // Calculate correction factor. - if (power_term < 1.0) assert(error_term >= 0.0); - - return fclamp(pow(error_term, power_term), 0.05, 5.0); -} - -#define ERR_DIVISOR 100.0 -static int get_twopass_worst_quality(const AV1_COMP *cpi, - const double section_err, - double inactive_zone, - int section_target_bandwidth, - double group_weight_factor) { - const RATE_CONTROL *const rc = &cpi->rc; - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - - inactive_zone = fclamp(inactive_zone, 0.0, 1.0); - - if (section_target_bandwidth <= 0) { - return rc->worst_quality; // Highest value allowed - } else { - const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) - ? cpi->initial_mbs - : cpi->common.MBs; - const int active_mbs = AOMMAX(1, num_mbs - (int)(num_mbs * inactive_zone)); - const double av_err_per_mb = section_err / active_mbs; - const double speed_term = 1.0; - double ediv_size_correction; - const int target_norm_bits_per_mb = - (int)((uint64_t)section_target_bandwidth << BPER_MB_NORMBITS) / - active_mbs; - int q; - - // Larger image formats are expected to be a little harder to code - // relatively given the same prediction error score. This in part at - // least relates to the increased size and hence coding overheads of - // motion vectors. Some account of this is made through adjustment of - // the error divisor. - ediv_size_correction = - AOMMAX(0.2, AOMMIN(5.0, get_linear_size_factor(cpi))); - if (ediv_size_correction < 1.0) - ediv_size_correction = -(1.0 / ediv_size_correction); - ediv_size_correction *= 4.0; - - // Try and pick a max Q that will be high enough to encode the - // content at the given rate. - for (q = rc->best_quality; q < rc->worst_quality; ++q) { - const double factor = calc_correction_factor( - av_err_per_mb, ERR_DIVISOR - ediv_size_correction, FACTOR_PT_LOW, - FACTOR_PT_HIGH, q, cpi->common.seq_params.bit_depth); - const int bits_per_mb = av1_rc_bits_per_mb( - INTER_FRAME, q, factor * speed_term * group_weight_factor, - cpi->common.seq_params.bit_depth); - if (bits_per_mb <= target_norm_bits_per_mb) break; - } - - // Restriction on active max q for constrained quality mode. - if (cpi->oxcf.rc_mode == AOM_CQ) q = AOMMAX(q, oxcf->cq_level); - return q; - } -} - -static void setup_rf_level_maxq(AV1_COMP *cpi) { - int i; - RATE_CONTROL *const rc = &cpi->rc; - for (i = INTER_NORMAL; i < RATE_FACTOR_LEVELS; ++i) { - int qdelta = av1_frame_type_qdelta(cpi, i, rc->worst_quality); - rc->rf_level_maxq[i] = AOMMAX(rc->worst_quality + qdelta, rc->best_quality); - } -} - -void av1_init_second_pass(AV1_COMP *cpi) { - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - TWO_PASS *const twopass = &cpi->twopass; - double frame_rate; - FIRSTPASS_STATS *stats; - - zero_stats(&twopass->total_stats); - zero_stats(&twopass->total_left_stats); - - if (!twopass->stats_in_end) return; - - stats = &twopass->total_stats; - - *stats = *twopass->stats_in_end; - twopass->total_left_stats = *stats; - - frame_rate = 10000000.0 * stats->count / stats->duration; - // Each frame can have a different duration, as the frame rate in the source - // isn't guaranteed to be constant. The frame rate prior to the first frame - // encoded in the second pass is a guess. However, the sum duration is not. - // It is calculated based on the actual durations of all frames from the - // first pass. - av1_new_framerate(cpi, frame_rate); - twopass->bits_left = - (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0); - - // This variable monitors how far behind the second ref update is lagging. - twopass->sr_update_lag = 1; - - // Scan the first pass file and calculate a modified total error based upon - // the bias/power function used to allocate bits. - { - const double avg_error = - stats->coded_error / DOUBLE_DIVIDE_CHECK(stats->count); - const FIRSTPASS_STATS *s = twopass->stats_in; - double modified_error_total = 0.0; - twopass->modified_error_min = - (avg_error * oxcf->two_pass_vbrmin_section) / 100; - twopass->modified_error_max = - (avg_error * oxcf->two_pass_vbrmax_section) / 100; - while (s < twopass->stats_in_end) { - modified_error_total += calculate_modified_err(cpi, twopass, oxcf, s); - ++s; - } - twopass->modified_error_left = modified_error_total; - } - - // Reset the vbr bits off target counters - cpi->rc.vbr_bits_off_target = 0; - cpi->rc.vbr_bits_off_target_fast = 0; - - cpi->rc.rate_error_estimate = 0; - - // Static sequence monitor variables. - twopass->kf_zeromotion_pct = 100; - twopass->last_kfgroup_zeromotion_pct = 100; - - if (oxcf->resize_mode != RESIZE_NONE) { - setup_rf_level_maxq(cpi); - } -} - -#define SR_DIFF_PART 0.0015 -#define MOTION_AMP_PART 0.003 -#define INTRA_PART 0.005 -#define DEFAULT_DECAY_LIMIT 0.75 -#define LOW_SR_DIFF_TRHESH 0.1 -#define SR_DIFF_MAX 128.0 - -static double get_sr_decay_rate(const AV1_COMP *cpi, - const FIRSTPASS_STATS *frame) { - const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs - : cpi->common.MBs; - double sr_diff = (frame->sr_coded_error - frame->coded_error) / num_mbs; - double sr_decay = 1.0; - double modified_pct_inter; - double modified_pcnt_intra; - const double motion_amplitude_factor = - frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / 2); - - modified_pct_inter = frame->pcnt_inter; - if ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) < - (double)NCOUNT_FRAME_II_THRESH) { - modified_pct_inter = frame->pcnt_inter - frame->pcnt_neutral; - } - modified_pcnt_intra = 100 * (1.0 - modified_pct_inter); - - if ((sr_diff > LOW_SR_DIFF_TRHESH)) { - sr_diff = AOMMIN(sr_diff, SR_DIFF_MAX); - sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) - - (MOTION_AMP_PART * motion_amplitude_factor) - - (INTRA_PART * modified_pcnt_intra); - } - return AOMMAX(sr_decay, AOMMIN(DEFAULT_DECAY_LIMIT, modified_pct_inter)); -} - -// This function gives an estimate of how badly we believe the prediction -// quality is decaying from frame to frame. -static double get_zero_motion_factor(const AV1_COMP *cpi, - const FIRSTPASS_STATS *frame) { - const double zero_motion_pct = frame->pcnt_inter - frame->pcnt_motion; - double sr_decay = get_sr_decay_rate(cpi, frame); - return AOMMIN(sr_decay, zero_motion_pct); -} - -#define ZM_POWER_FACTOR 0.75 - -static double get_prediction_decay_rate(const AV1_COMP *cpi, - const FIRSTPASS_STATS *next_frame) { - const double sr_decay_rate = get_sr_decay_rate(cpi, next_frame); - const double zero_motion_factor = - (0.95 * pow((next_frame->pcnt_inter - next_frame->pcnt_motion), - ZM_POWER_FACTOR)); - - return AOMMAX(zero_motion_factor, - (sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor))); -} - -// Function to test for a condition where a complex transition is followed -// by a static section. For example in slide shows where there is a fade -// between slides. This is to help with more optimal kf and gf positioning. -static int detect_transition_to_still(AV1_COMP *cpi, int frame_interval, - int still_interval, - double loop_decay_rate, - double last_decay_rate) { - TWO_PASS *const twopass = &cpi->twopass; - RATE_CONTROL *const rc = &cpi->rc; - - // Break clause to detect very still sections after motion - // For example a static image after a fade or other transition - // instead of a clean scene cut. - if (frame_interval > rc->min_gf_interval && loop_decay_rate >= 0.999 && - last_decay_rate < 0.9) { - int j; - - // Look ahead a few frames to see if static condition persists... - for (j = 0; j < still_interval; ++j) { - const FIRSTPASS_STATS *stats = &twopass->stats_in[j]; - if (stats >= twopass->stats_in_end) break; - - if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break; - } - - // Only if it does do we signal a transition to still. - return j == still_interval; - } - - return 0; -} - -// This function detects a flash through the high relative pcnt_second_ref -// score in the frame following a flash frame. The offset passed in should -// reflect this. -static int detect_flash(const TWO_PASS *twopass, int offset) { - const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset); - - // What we are looking for here is a situation where there is a - // brief break in prediction (such as a flash) but subsequent frames - // are reasonably well predicted by an earlier (pre flash) frame. - // The recovery after a flash is indicated by a high pcnt_second_ref - // compared to pcnt_inter. - return next_frame != NULL && - next_frame->pcnt_second_ref > next_frame->pcnt_inter && - next_frame->pcnt_second_ref >= 0.5; -} - -// Update the motion related elements to the GF arf boost calculation. -static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats, - double *mv_in_out, - double *mv_in_out_accumulator, - double *abs_mv_in_out_accumulator, - double *mv_ratio_accumulator) { - const double pct = stats->pcnt_motion; - - // Accumulate Motion In/Out of frame stats. - *mv_in_out = stats->mv_in_out_count * pct; - *mv_in_out_accumulator += *mv_in_out; - *abs_mv_in_out_accumulator += fabs(*mv_in_out); - - // Accumulate a measure of how uniform (or conversely how random) the motion - // field is (a ratio of abs(mv) / mv). - if (pct > 0.05) { - const double mvr_ratio = - fabs(stats->mvr_abs) / DOUBLE_DIVIDE_CHECK(fabs(stats->MVr)); - const double mvc_ratio = - fabs(stats->mvc_abs) / DOUBLE_DIVIDE_CHECK(fabs(stats->MVc)); - - *mv_ratio_accumulator += - pct * (mvr_ratio < stats->mvr_abs ? mvr_ratio : stats->mvr_abs); - *mv_ratio_accumulator += - pct * (mvc_ratio < stats->mvc_abs ? mvc_ratio : stats->mvc_abs); - } -} - -#define BASELINE_ERR_PER_MB 1000.0 -static double calc_frame_boost(AV1_COMP *cpi, const FIRSTPASS_STATS *this_frame, - double this_frame_mv_in_out, double max_boost) { - double frame_boost; - const double lq = av1_convert_qindex_to_q( - cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.seq_params.bit_depth); - const double boost_q_correction = AOMMIN((0.5 + (lq * 0.015)), 1.5); - int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs - : cpi->common.MBs; - - // Correct for any inactive region in the image - num_mbs = (int)AOMMAX(1, num_mbs * calculate_active_area(cpi, this_frame)); - - // Underlying boost factor is based on inter error ratio. - frame_boost = (BASELINE_ERR_PER_MB * num_mbs) / - DOUBLE_DIVIDE_CHECK(this_frame->coded_error); - frame_boost = frame_boost * BOOST_FACTOR * boost_q_correction; - - // Increase boost for frames where new data coming into frame (e.g. zoom out). - // Slightly reduce boost if there is a net balance of motion out of the frame - // (zoom in). The range for this_frame_mv_in_out is -1.0 to +1.0. - if (this_frame_mv_in_out > 0.0) - frame_boost += frame_boost * (this_frame_mv_in_out * 2.0); - // In the extreme case the boost is halved. - else - frame_boost += frame_boost * (this_frame_mv_in_out / 2.0); - - return AOMMIN(frame_boost, max_boost * boost_q_correction); -} - -static int calc_arf_boost(AV1_COMP *cpi, int offset, int f_frames, int b_frames, - int *f_boost, int *b_boost) { - TWO_PASS *const twopass = &cpi->twopass; - int i; - double boost_score = 0.0; - double mv_ratio_accumulator = 0.0; - double decay_accumulator = 1.0; - double this_frame_mv_in_out = 0.0; - double mv_in_out_accumulator = 0.0; - double abs_mv_in_out_accumulator = 0.0; - int arf_boost; - int flash_detected = 0; - - // Search forward from the proposed arf/next gf position. - for (i = 0; i < f_frames; ++i) { - const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i + offset); - if (this_frame == NULL) break; - - // Update the motion related elements to the boost calculation. - accumulate_frame_motion_stats( - this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, - &abs_mv_in_out_accumulator, &mv_ratio_accumulator); - - // We want to discount the flash frame itself and the recovery - // frame that follows as both will have poor scores. - flash_detected = detect_flash(twopass, i + offset) || - detect_flash(twopass, i + offset + 1); - - // Accumulate the effect of prediction quality decay. - if (!flash_detected) { - decay_accumulator *= get_prediction_decay_rate(cpi, this_frame); - decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR - ? MIN_DECAY_FACTOR - : decay_accumulator; - } - - boost_score += - decay_accumulator * - calc_frame_boost(cpi, this_frame, this_frame_mv_in_out, GF_MAX_BOOST); - } - - *f_boost = (int)boost_score; - - // Reset for backward looking loop. - boost_score = 0.0; - mv_ratio_accumulator = 0.0; - decay_accumulator = 1.0; - this_frame_mv_in_out = 0.0; - mv_in_out_accumulator = 0.0; - abs_mv_in_out_accumulator = 0.0; - - // Search backward towards last gf position. - for (i = -1; i >= -b_frames; --i) { - const FIRSTPASS_STATS *this_frame = read_frame_stats(twopass, i + offset); - if (this_frame == NULL) break; - - // Update the motion related elements to the boost calculation. - accumulate_frame_motion_stats( - this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, - &abs_mv_in_out_accumulator, &mv_ratio_accumulator); - - // We want to discount the the flash frame itself and the recovery - // frame that follows as both will have poor scores. - flash_detected = detect_flash(twopass, i + offset) || - detect_flash(twopass, i + offset + 1); - - // Cumulative effect of prediction quality decay. - if (!flash_detected) { - decay_accumulator *= get_prediction_decay_rate(cpi, this_frame); - decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR - ? MIN_DECAY_FACTOR - : decay_accumulator; - } - - boost_score += - decay_accumulator * - calc_frame_boost(cpi, this_frame, this_frame_mv_in_out, GF_MAX_BOOST); - } - *b_boost = (int)boost_score; - - arf_boost = (*f_boost + *b_boost); - if (arf_boost < ((b_frames + f_frames) * 20)) - arf_boost = ((b_frames + f_frames) * 20); - arf_boost = AOMMAX(arf_boost, MIN_ARF_GF_BOOST); - - return arf_boost; -} - -// Calculate a section intra ratio used in setting max loop filter. -static int calculate_section_intra_ratio(const FIRSTPASS_STATS *begin, - const FIRSTPASS_STATS *end, - int section_length) { - const FIRSTPASS_STATS *s = begin; - double intra_error = 0.0; - double coded_error = 0.0; - int i = 0; - - while (s < end && i < section_length) { - intra_error += s->intra_error; - coded_error += s->coded_error; - ++s; - ++i; - } - - return (int)(intra_error / DOUBLE_DIVIDE_CHECK(coded_error)); -} - -// Calculate the total bits to allocate in this GF/ARF group. -static int64_t calculate_total_gf_group_bits(AV1_COMP *cpi, - double gf_group_err) { - const RATE_CONTROL *const rc = &cpi->rc; - const TWO_PASS *const twopass = &cpi->twopass; - const int max_bits = frame_max_bits(rc, &cpi->oxcf); - int64_t total_group_bits; - - // Calculate the bits to be allocated to the group as a whole. - if ((twopass->kf_group_bits > 0) && (twopass->kf_group_error_left > 0)) { - total_group_bits = (int64_t)(twopass->kf_group_bits * - (gf_group_err / twopass->kf_group_error_left)); - } else { - total_group_bits = 0; - } - - // Clamp odd edge cases. - total_group_bits = (total_group_bits < 0) - ? 0 - : (total_group_bits > twopass->kf_group_bits) - ? twopass->kf_group_bits - : total_group_bits; - - // Clip based on user supplied data rate variability limit. - if (total_group_bits > (int64_t)max_bits * rc->baseline_gf_interval) - total_group_bits = (int64_t)max_bits * rc->baseline_gf_interval; - - return total_group_bits; -} - -// Calculate the number bits extra to assign to boosted frames in a group. -static int calculate_boost_bits(int frame_count, int boost, - int64_t total_group_bits) { - int allocation_chunks; - - // return 0 for invalid inputs (could arise e.g. through rounding errors) - if (!boost || (total_group_bits <= 0) || (frame_count <= 0)) return 0; - - allocation_chunks = (frame_count * 100) + boost; - - // Prevent overflow. - if (boost > 1023) { - int divisor = boost >> 10; - boost /= divisor; - allocation_chunks /= divisor; - } - - // Calculate the number of extra bits for use in the boosted frame or frames. - return AOMMAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), - 0); -} - -#if USE_SYMM_MULTI_LAYER -// #define CHCEK_GF_PARAMETER -#ifdef CHCEK_GF_PARAMETER -void check_frame_params(GF_GROUP *const gf_group, int gf_interval, - int frame_nums) { - static const char *update_type_strings[] = { - "KF_UPDATE", "LF_UPDATE", "GF_UPDATE", - "ARF_UPDATE", "OVERLAY_UPDATE", "BRF_UPDATE", - "LAST_BIPRED_UPDATE", "BIPRED_UPDATE", "INTNL_OVERLAY_UPDATE", - "INTNL_ARF_UPDATE" - }; - FILE *fid = fopen("GF_PARAMS.txt", "a"); - - fprintf(fid, "\n{%d}\n", gf_interval); - for (int i = 0; i <= frame_nums; ++i) { - fprintf(fid, "%s %d %d %d %d\n", - update_type_strings[gf_group->update_type[i]], - gf_group->arf_src_offset[i], gf_group->arf_pos_in_gf[i], - gf_group->arf_update_idx[i], gf_group->pyramid_level[i]); - } - - fprintf(fid, "number of nodes in each level: \n"); - for (int i = 0; i < MAX_PYRAMID_LVL; ++i) { - fprintf(fid, "lvl %d: %d ", i, gf_group->pyramid_lvl_nodes[i]); - } - fprintf(fid, "\n"); - fclose(fid); -} -#endif // CHCEK_GF_PARAMETER -static int update_type_2_rf_level(FRAME_UPDATE_TYPE update_type) { - // Derive rf_level from update_type - switch (update_type) { - case LF_UPDATE: return INTER_NORMAL; - case ARF_UPDATE: return GF_ARF_STD; - case OVERLAY_UPDATE: return INTER_NORMAL; - case BRF_UPDATE: return GF_ARF_LOW; - case LAST_BIPRED_UPDATE: return INTER_NORMAL; - case BIPRED_UPDATE: return INTER_NORMAL; - case INTNL_ARF_UPDATE: return GF_ARF_LOW; - case INTNL_OVERLAY_UPDATE: return INTER_NORMAL; - default: return INTER_NORMAL; - } -} - -static void set_multi_layer_params(GF_GROUP *const gf_group, int l, int r, - int *frame_ind, int arf_ind, int level) { - if (r - l < 4) { - while (++l < r) { - // leaf nodes, not a look-ahead frame - gf_group->update_type[*frame_ind] = LF_UPDATE; - gf_group->arf_src_offset[*frame_ind] = 0; - gf_group->arf_pos_in_gf[*frame_ind] = 0; - gf_group->arf_update_idx[*frame_ind] = arf_ind; - gf_group->pyramid_level[*frame_ind] = 0; - ++gf_group->pyramid_lvl_nodes[0]; - ++(*frame_ind); - } - } else { - int m = (l + r) / 2; - int arf_pos_in_gf = *frame_ind; - - gf_group->update_type[*frame_ind] = INTNL_ARF_UPDATE; - gf_group->arf_src_offset[*frame_ind] = m - l - 1; - gf_group->arf_pos_in_gf[*frame_ind] = 0; - gf_group->arf_update_idx[*frame_ind] = 1; // mark all internal ARF 1 - gf_group->pyramid_level[*frame_ind] = level; - ++gf_group->pyramid_lvl_nodes[level]; - ++(*frame_ind); - - // set parameters for frames displayed before this frame - set_multi_layer_params(gf_group, l, m, frame_ind, 1, level - 1); - - // for overlay frames, we need to record the position of its corresponding - // arf frames for bit allocation - gf_group->update_type[*frame_ind] = INTNL_OVERLAY_UPDATE; - gf_group->arf_src_offset[*frame_ind] = 0; - gf_group->arf_pos_in_gf[*frame_ind] = arf_pos_in_gf; - gf_group->arf_update_idx[*frame_ind] = 1; - gf_group->pyramid_level[*frame_ind] = 0; - ++(*frame_ind); - - // set parameters for frames displayed after this frame - set_multi_layer_params(gf_group, m, r, frame_ind, arf_ind, level - 1); - } -} - -static INLINE unsigned char get_pyramid_height(int pyramid_width) { - assert(pyramid_width <= 16 && pyramid_width >= 4 && - "invalid gf interval for pyramid structure"); - - return pyramid_width > 12 ? 4 : (pyramid_width > 6 ? 3 : 2); -} - -static int construct_multi_layer_gf_structure(GF_GROUP *const gf_group, - const int gf_interval) { - int frame_index = 0; - gf_group->pyramid_height = get_pyramid_height(gf_interval); - - assert(gf_group->pyramid_height <= MAX_PYRAMID_LVL); - - av1_zero_array(gf_group->pyramid_lvl_nodes, MAX_PYRAMID_LVL); - - // At the beginning of each GF group it will be a key or overlay frame, - gf_group->update_type[frame_index] = OVERLAY_UPDATE; - gf_group->arf_src_offset[frame_index] = 0; - gf_group->arf_pos_in_gf[frame_index] = 0; - gf_group->arf_update_idx[frame_index] = 0; - gf_group->pyramid_level[frame_index] = 0; - ++frame_index; - - // ALT0 - gf_group->update_type[frame_index] = ARF_UPDATE; - gf_group->arf_src_offset[frame_index] = gf_interval - 1; - gf_group->arf_pos_in_gf[frame_index] = 0; - gf_group->arf_update_idx[frame_index] = 0; - gf_group->pyramid_level[frame_index] = gf_group->pyramid_height; - ++frame_index; - - // set parameters for the rest of the frames - set_multi_layer_params(gf_group, 0, gf_interval, &frame_index, 0, - gf_group->pyramid_height - 1); - return frame_index; -} - -void define_customized_gf_group_structure(AV1_COMP *cpi) { - RATE_CONTROL *const rc = &cpi->rc; - TWO_PASS *const twopass = &cpi->twopass; - GF_GROUP *const gf_group = &twopass->gf_group; - const int key_frame = cpi->common.frame_type == KEY_FRAME; - - assert(rc->baseline_gf_interval >= 4 && - rc->baseline_gf_interval <= MAX_PYRAMID_SIZE); - - const int gf_update_frames = - construct_multi_layer_gf_structure(gf_group, rc->baseline_gf_interval); - int frame_index; - - cpi->num_extra_arfs = 0; - - for (frame_index = 0; frame_index < gf_update_frames; ++frame_index) { - // Set unused variables to default values - gf_group->bidir_pred_enabled[frame_index] = 0; - gf_group->brf_src_offset[frame_index] = 0; - - // Special handle for the first frame for assigning update_type - if (frame_index == 0) { - // For key frames the frame target rate is already set and it - // is also the golden frame. - if (key_frame) { - gf_group->update_type[frame_index] = KF_UPDATE; - continue; - } - - if (rc->source_alt_ref_active) { - gf_group->update_type[frame_index] = OVERLAY_UPDATE; - } else { - gf_group->update_type[frame_index] = GF_UPDATE; - } - } else { - if (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE) - ++cpi->num_extra_arfs; - } - - // Assign rf level based on update type - gf_group->rf_level[frame_index] = - update_type_2_rf_level(gf_group->update_type[frame_index]); - } - - // NOTE: We need to configure the frame at the end of the sequence + 1 that - // will be the start frame for the next group. Otherwise prior to the - // call to av1_rc_get_second_pass_params() the data will be undefined. - if (rc->source_alt_ref_pending) { - gf_group->update_type[frame_index] = OVERLAY_UPDATE; - gf_group->rf_level[frame_index] = INTER_NORMAL; - } else { - gf_group->update_type[frame_index] = GF_UPDATE; - gf_group->rf_level[frame_index] = GF_ARF_STD; - } - - gf_group->bidir_pred_enabled[frame_index] = 0; - gf_group->brf_src_offset[frame_index] = 0; - gf_group->arf_update_idx[frame_index] = 0; - // This value is only used for INTNL_OVERLAY_UPDATE - gf_group->arf_pos_in_gf[frame_index] = 0; - - // This parameter is useless? - gf_group->arf_ref_idx[frame_index] = 0; -#ifdef CHCEK_GF_PARAMETER - check_frame_params(gf_group, rc->baseline_gf_interval, gf_update_frames); -#endif -} - -// It is an example of how to define a GF stucture manually. The function will -// result in exactly the same GF group structure as -// define_customized_gf_group_structure() when rc->baseline_gf_interval == 4 -#if USE_MANUAL_GF4_STRUCT -#define GF_INTERVAL_4 4 -static const unsigned char gf4_multi_layer_params[][GF_FRAME_PARAMS] = { - { - // gf_group->index == 0 (Frame 0) - // It can also be KEY frame. Will assign the proper value - // in define_gf_group_structure - OVERLAY_UPDATE, // update_type (default value) - 0, // arf_src_offset - 0, // arf_pos_in_gf - 0 // arf_update_idx - }, - { - // gf_group->index == 1 (Frame 4) - ARF_UPDATE, // update_type - GF_INTERVAL_4 - 1, // arf_src_offset - 0, // arf_pos_in_gf - 0 // arf_update_idx - }, - { - // gf_group->index == 2 (Frame 2) - INTNL_ARF_UPDATE, // update_type - (GF_INTERVAL_4 >> 1) - 1, // arf_src_offset - 0, // arf_pos_in_gf - 0 // arf_update_idx - }, - { - // gf_group->index == 3 (Frame 1) - LAST_BIPRED_UPDATE, // update_type - 0, // arf_src_offset - 0, // arf_pos_in_gf - 0 // arf_update_idx - }, - - { - // gf_group->index == 4 (Frame 2 - OVERLAY) - INTNL_OVERLAY_UPDATE, // update_type - 0, // arf_src_offset - 2, // arf_pos_in_gf - 0 // arf_update_idx - }, - { - // gf_group->index == 5 (Frame 3) - LF_UPDATE, // update_type - 0, // arf_src_offset - 0, // arf_pos_in_gf - 1 // arf_update_idx - } -}; - -static int define_gf_group_structure_4(AV1_COMP *cpi) { - RATE_CONTROL *const rc = &cpi->rc; - TWO_PASS *const twopass = &cpi->twopass; - GF_GROUP *const gf_group = &twopass->gf_group; - const int key_frame = cpi->common.frame_type == KEY_FRAME; - - assert(rc->baseline_gf_interval == GF_INTERVAL_4); - - const int gf_update_frames = rc->baseline_gf_interval + 2; - int frame_index; - - for (frame_index = 0; frame_index < gf_update_frames; ++frame_index) { - int param_idx = 0; - - gf_group->bidir_pred_enabled[frame_index] = 0; - - if (frame_index == 0) { - // gf_group->arf_src_offset[frame_index] = 0; - gf_group->brf_src_offset[frame_index] = 0; - gf_group->bidir_pred_enabled[frame_index] = 0; - - // For key frames the frame target rate is already set and it - // is also the golden frame. - if (key_frame) continue; - - gf_group->update_type[frame_index] = - gf4_multi_layer_params[frame_index][param_idx++]; - - if (rc->source_alt_ref_active) { - gf_group->update_type[frame_index] = OVERLAY_UPDATE; - } else { - gf_group->update_type[frame_index] = GF_UPDATE; - } - param_idx++; - } else { - gf_group->update_type[frame_index] = - gf4_multi_layer_params[frame_index][param_idx++]; - } - - // setup other parameters - gf_group->rf_level[frame_index] = - update_type_2_rf_level(gf_group->update_type[frame_index]); - - // == arf_src_offset == - gf_group->arf_src_offset[frame_index] = - gf4_multi_layer_params[frame_index][param_idx++]; - - // == arf_pos_in_gf == - gf_group->arf_pos_in_gf[frame_index] = - gf4_multi_layer_params[frame_index][param_idx++]; - - // == arf_update_idx == - gf_group->brf_src_offset[frame_index] = - gf4_multi_layer_params[frame_index][param_idx]; - } - - // NOTE: We need to configure the frame at the end of the sequence + 1 that - // will be the start frame for the next group. Otherwise prior to the - // call to av1_rc_get_second_pass_params() the data will be undefined. - gf_group->arf_update_idx[frame_index] = 0; - gf_group->arf_ref_idx[frame_index] = 0; - - if (rc->source_alt_ref_pending) { - gf_group->update_type[frame_index] = OVERLAY_UPDATE; - gf_group->rf_level[frame_index] = INTER_NORMAL; - - } else { - gf_group->update_type[frame_index] = GF_UPDATE; - gf_group->rf_level[frame_index] = GF_ARF_STD; - } - - gf_group->bidir_pred_enabled[frame_index] = 0; - gf_group->brf_src_offset[frame_index] = 0; - - // This value is only used for INTNL_OVERLAY_UPDATE - gf_group->arf_pos_in_gf[frame_index] = 0; - - return gf_update_frames; -} -#endif // USE_MANUAL_GF4_STRUCT -#endif // USE_SYMM_MULTI_LAYER - -static void define_gf_group_structure(AV1_COMP *cpi) { - RATE_CONTROL *const rc = &cpi->rc; - -#if USE_SYMM_MULTI_LAYER - const int valid_customized_gf_length = - rc->baseline_gf_interval >= 4 && - rc->baseline_gf_interval <= MAX_PYRAMID_SIZE; - // used the new structure only if extra_arf is allowed - if (valid_customized_gf_length && rc->source_alt_ref_pending && - cpi->extra_arf_allowed > 0) { -#if USE_MANUAL_GF4_STRUCT - if (rc->baseline_gf_interval == 4) - define_gf_group_structure_4(cpi); - else -#endif - define_customized_gf_group_structure(cpi); - cpi->new_bwdref_update_rule = 1; - return; - } else { - cpi->new_bwdref_update_rule = 0; - } -#endif - - TWO_PASS *const twopass = &cpi->twopass; - GF_GROUP *const gf_group = &twopass->gf_group; - int i; - int frame_index = 0; - const int key_frame = cpi->common.frame_type == KEY_FRAME; - - // The use of bi-predictive frames are only enabled when following 3 - // conditions are met: - // (1) ALTREF is enabled; - // (2) The bi-predictive group interval is at least 2; and - // (3) The bi-predictive group interval is strictly smaller than the - // golden group interval. - const int is_bipred_enabled = - cpi->extra_arf_allowed && rc->source_alt_ref_pending && - rc->bipred_group_interval && - rc->bipred_group_interval <= - (rc->baseline_gf_interval - rc->source_alt_ref_pending); - int bipred_group_end = 0; - int bipred_frame_index = 0; - - const unsigned char ext_arf_interval = - (unsigned char)(rc->baseline_gf_interval / (cpi->num_extra_arfs + 1) - 1); - int which_arf = cpi->num_extra_arfs; - int subgroup_interval[MAX_EXT_ARFS + 1]; - int is_sg_bipred_enabled = is_bipred_enabled; - int accumulative_subgroup_interval = 0; - - // For key frames the frame target rate is already set and it - // is also the golden frame. - // === [frame_index == 0] === - if (!key_frame) { - if (rc->source_alt_ref_active) { - gf_group->update_type[frame_index] = OVERLAY_UPDATE; - gf_group->rf_level[frame_index] = INTER_NORMAL; - } else { - gf_group->update_type[frame_index] = GF_UPDATE; - gf_group->rf_level[frame_index] = GF_ARF_STD; - } - gf_group->arf_update_idx[frame_index] = 0; - gf_group->arf_ref_idx[frame_index] = 0; - } - - gf_group->bidir_pred_enabled[frame_index] = 0; - gf_group->brf_src_offset[frame_index] = 0; - - frame_index++; - - bipred_frame_index++; - - // === [frame_index == 1] === - if (rc->source_alt_ref_pending) { - gf_group->update_type[frame_index] = ARF_UPDATE; - gf_group->rf_level[frame_index] = GF_ARF_STD; - gf_group->arf_src_offset[frame_index] = - (unsigned char)(rc->baseline_gf_interval - 1); - - gf_group->arf_update_idx[frame_index] = 0; - gf_group->arf_ref_idx[frame_index] = 0; - - gf_group->bidir_pred_enabled[frame_index] = 0; - gf_group->brf_src_offset[frame_index] = 0; - // NOTE: "bidir_pred_frame_index" stays unchanged for ARF_UPDATE frames. - - // Work out the ARFs' positions in this gf group - // NOTE(weitinglin): ALT_REFs' are indexed inversely, but coded in display - // order (except for the original ARF). In the example of three ALT_REF's, - // We index ALTREF's as: KEY ----- ALT2 ----- ALT1 ----- ALT0 - // but code them in the following order: - // KEY-ALT0-ALT2 ----- OVERLAY2-ALT1 ----- OVERLAY1 ----- OVERLAY0 - // - // arf_pos_for_ovrly[]: Position for OVERLAY - // arf_pos_in_gf[]: Position for ALTREF - cpi->arf_pos_for_ovrly[0] = frame_index + cpi->num_extra_arfs + - gf_group->arf_src_offset[frame_index] + 1; - for (i = 0; i < cpi->num_extra_arfs; ++i) { - cpi->arf_pos_for_ovrly[i + 1] = - frame_index + (cpi->num_extra_arfs - i) * (ext_arf_interval + 2); - subgroup_interval[i] = cpi->arf_pos_for_ovrly[i] - - cpi->arf_pos_for_ovrly[i + 1] - (i == 0 ? 1 : 2); - } - subgroup_interval[cpi->num_extra_arfs] = - cpi->arf_pos_for_ovrly[cpi->num_extra_arfs] - frame_index - - (cpi->num_extra_arfs == 0 ? 1 : 2); - - ++frame_index; - - // Insert an extra ARF - // === [frame_index == 2] === - if (cpi->num_extra_arfs) { - gf_group->update_type[frame_index] = INTNL_ARF_UPDATE; - gf_group->rf_level[frame_index] = GF_ARF_LOW; - gf_group->arf_src_offset[frame_index] = ext_arf_interval; - - gf_group->arf_update_idx[frame_index] = which_arf; - gf_group->arf_ref_idx[frame_index] = 0; - ++frame_index; - } - accumulative_subgroup_interval += subgroup_interval[cpi->num_extra_arfs]; - } - - for (i = 0; i < rc->baseline_gf_interval - rc->source_alt_ref_pending; ++i) { - gf_group->arf_update_idx[frame_index] = which_arf; - gf_group->arf_ref_idx[frame_index] = which_arf; - - // If we are going to have ARFs, check whether we can have BWDREF in this - // subgroup, and further, whether we can have ARF subgroup which contains - // the BWDREF subgroup but contained within the GF group: - // - // GF group --> ARF subgroup --> BWDREF subgroup - if (rc->source_alt_ref_pending) { - is_sg_bipred_enabled = - is_bipred_enabled && - (subgroup_interval[which_arf] > rc->bipred_group_interval); - } - - // NOTE: BIDIR_PRED is only enabled when the length of the bi-predictive - // frame group interval is strictly smaller than that of the GOLDEN - // FRAME group interval. - // TODO(zoeliu): Currently BIDIR_PRED is only enabled when alt-ref is on. - if (is_sg_bipred_enabled && !bipred_group_end) { - const int cur_brf_src_offset = rc->bipred_group_interval - 1; - - if (bipred_frame_index == 1) { - // --- BRF_UPDATE --- - gf_group->update_type[frame_index] = BRF_UPDATE; - gf_group->rf_level[frame_index] = GF_ARF_LOW; - gf_group->brf_src_offset[frame_index] = cur_brf_src_offset; - } else if (bipred_frame_index == rc->bipred_group_interval) { - // --- LAST_BIPRED_UPDATE --- - gf_group->update_type[frame_index] = LAST_BIPRED_UPDATE; - gf_group->rf_level[frame_index] = INTER_NORMAL; - gf_group->brf_src_offset[frame_index] = 0; - - // Reset the bi-predictive frame index. - bipred_frame_index = 0; - } else { - // --- BIPRED_UPDATE --- - gf_group->update_type[frame_index] = BIPRED_UPDATE; - gf_group->rf_level[frame_index] = INTER_NORMAL; - gf_group->brf_src_offset[frame_index] = 0; - } - gf_group->bidir_pred_enabled[frame_index] = 1; - - bipred_frame_index++; - // Check whether the next bi-predictive frame group would entirely be - // included within the current golden frame group. - // In addition, we need to avoid coding a BRF right before an ARF. - if (bipred_frame_index == 1 && - (i + 2 + cur_brf_src_offset) >= accumulative_subgroup_interval) { - bipred_group_end = 1; - } - } else { - gf_group->update_type[frame_index] = LF_UPDATE; - gf_group->rf_level[frame_index] = INTER_NORMAL; - gf_group->bidir_pred_enabled[frame_index] = 0; - gf_group->brf_src_offset[frame_index] = 0; - } - - ++frame_index; - - // Check if we need to update the ARF. - if (is_sg_bipred_enabled && cpi->num_extra_arfs && which_arf > 0 && - frame_index > cpi->arf_pos_for_ovrly[which_arf]) { - --which_arf; - accumulative_subgroup_interval += subgroup_interval[which_arf] + 1; - - // Meet the new subgroup; Reset the bipred_group_end flag. - bipred_group_end = 0; - // Insert another extra ARF after the overlay frame - if (which_arf) { - gf_group->update_type[frame_index] = INTNL_ARF_UPDATE; - gf_group->rf_level[frame_index] = GF_ARF_LOW; - gf_group->arf_src_offset[frame_index] = ext_arf_interval; - - gf_group->arf_update_idx[frame_index] = which_arf; - gf_group->arf_ref_idx[frame_index] = 0; - ++frame_index; - } - } - } - - // NOTE: We need to configure the frame at the end of the sequence + 1 that - // will be the start frame for the next group. Otherwise prior to the - // call to av1_rc_get_second_pass_params() the data will be undefined. - gf_group->arf_update_idx[frame_index] = 0; - gf_group->arf_ref_idx[frame_index] = 0; - - if (rc->source_alt_ref_pending) { - gf_group->update_type[frame_index] = OVERLAY_UPDATE; - gf_group->rf_level[frame_index] = INTER_NORMAL; - - cpi->arf_pos_in_gf[0] = 1; - if (cpi->num_extra_arfs) { - // Overwrite the update_type for extra-ARF's corresponding internal - // OVERLAY's: Change from LF_UPDATE to INTNL_OVERLAY_UPDATE. - for (i = cpi->num_extra_arfs; i > 0; --i) { - cpi->arf_pos_in_gf[i] = - (i == cpi->num_extra_arfs ? 2 : cpi->arf_pos_for_ovrly[i + 1] + 1); - - gf_group->update_type[cpi->arf_pos_for_ovrly[i]] = INTNL_OVERLAY_UPDATE; - gf_group->rf_level[cpi->arf_pos_for_ovrly[i]] = INTER_NORMAL; - } - } - } else { - gf_group->update_type[frame_index] = GF_UPDATE; - gf_group->rf_level[frame_index] = GF_ARF_STD; - } - - gf_group->bidir_pred_enabled[frame_index] = 0; - gf_group->brf_src_offset[frame_index] = 0; -} - -#if USE_SYMM_MULTI_LAYER -#define LEAF_REDUCTION_FACTOR 0.75f -#define LVL_3_BOOST_FACTOR 0.8f -#define LVL_2_BOOST_FACTOR 0.3f - -static float_t lvl_budget_factor[MAX_PYRAMID_LVL - 1][MAX_PYRAMID_LVL - 1] = { - { 1, 0, 0 }, - { LVL_3_BOOST_FACTOR, 0, 0 }, // Leaking budget works better - { LVL_3_BOOST_FACTOR, (1 - LVL_3_BOOST_FACTOR) * LVL_2_BOOST_FACTOR, - (1 - LVL_3_BOOST_FACTOR) * (1 - LVL_2_BOOST_FACTOR) } -}; -#endif // USE_SYMM_MULTI_LAYER -static void allocate_gf_group_bits(AV1_COMP *cpi, int64_t gf_group_bits, - double group_error, int gf_arf_bits) { - RATE_CONTROL *const rc = &cpi->rc; - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - TWO_PASS *const twopass = &cpi->twopass; - GF_GROUP *const gf_group = &twopass->gf_group; - FIRSTPASS_STATS frame_stats; - int i; - int frame_index = 0; - int target_frame_size; - int key_frame; - const int max_bits = frame_max_bits(&cpi->rc, &cpi->oxcf); - int64_t total_group_bits = gf_group_bits; - double modified_err = 0.0; - double err_fraction; - int ext_arf_boost[MAX_EXT_ARFS]; - - define_gf_group_structure(cpi); - - av1_zero_array(ext_arf_boost, MAX_EXT_ARFS); - - key_frame = cpi->common.frame_type == KEY_FRAME; - - // For key frames the frame target rate is already set and it - // is also the golden frame. - // === [frame_index == 0] === - if (!key_frame) { - if (rc->source_alt_ref_active) - gf_group->bit_allocation[frame_index] = 0; - else - gf_group->bit_allocation[frame_index] = gf_arf_bits; - - // Step over the golden frame / overlay frame - if (EOF == input_stats(twopass, &frame_stats)) return; - } - - // Deduct the boost bits for arf (or gf if it is not a key frame) - // from the group total. - if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits; - - frame_index++; - - // Store the bits to spend on the ARF if there is one. - // === [frame_index == 1] === - if (rc->source_alt_ref_pending) { - gf_group->bit_allocation[frame_index] = gf_arf_bits; - - ++frame_index; - - // Skip all the extra-ARF's right after ARF at the starting segment of - // the current GF group. - if (cpi->num_extra_arfs) { - while (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE) - ++frame_index; - } - } - - // Allocate bits to the other frames in the group. - for (i = 0; i < rc->baseline_gf_interval - rc->source_alt_ref_pending; ++i) { - if (EOF == input_stats(twopass, &frame_stats)) break; - - modified_err = calculate_modified_err(cpi, twopass, oxcf, &frame_stats); - - if (group_error > 0) - err_fraction = modified_err / DOUBLE_DIVIDE_CHECK(group_error); - else - err_fraction = 0.0; - - target_frame_size = (int)((double)total_group_bits * err_fraction); - - target_frame_size = - clamp(target_frame_size, 0, AOMMIN(max_bits, (int)total_group_bits)); - - if (gf_group->update_type[frame_index] == BRF_UPDATE) { - // Boost up the allocated bits on BWDREF_FRAME - gf_group->bit_allocation[frame_index] = - target_frame_size + (target_frame_size >> 2); - } else if (gf_group->update_type[frame_index] == LAST_BIPRED_UPDATE) { - // Press down the allocated bits on LAST_BIPRED_UPDATE frames - gf_group->bit_allocation[frame_index] = - target_frame_size - (target_frame_size >> 1); - } else if (gf_group->update_type[frame_index] == BIPRED_UPDATE) { - // TODO(zoeliu): To investigate whether the allocated bits on - // BIPRED_UPDATE frames need to be further adjusted. - gf_group->bit_allocation[frame_index] = target_frame_size; -#if USE_SYMM_MULTI_LAYER - } else if (cpi->new_bwdref_update_rule && - gf_group->update_type[frame_index] == INTNL_OVERLAY_UPDATE) { - assert(gf_group->pyramid_height <= MAX_PYRAMID_LVL && - gf_group->pyramid_height >= 0 && - "non-valid height for a pyramid structure"); - - int arf_pos = gf_group->arf_pos_in_gf[frame_index]; - gf_group->bit_allocation[frame_index] = 0; - - gf_group->bit_allocation[arf_pos] = target_frame_size; -#if MULTI_LVL_BOOST_VBR_CQ - const int pyr_h = gf_group->pyramid_height - 2; - const int this_lvl = gf_group->pyramid_level[arf_pos]; - const int dist2top = gf_group->pyramid_height - 1 - this_lvl; - - const float_t budget = - LEAF_REDUCTION_FACTOR * gf_group->pyramid_lvl_nodes[0]; - const float_t lvl_boost = budget * lvl_budget_factor[pyr_h][dist2top] / - gf_group->pyramid_lvl_nodes[this_lvl]; - - gf_group->bit_allocation[arf_pos] += (int)(target_frame_size * lvl_boost); -#endif // MULTI_LVL_BOOST_VBR_CQ -#endif // USE_SYMM_MULTI_LAYER - } else { - assert(gf_group->update_type[frame_index] == LF_UPDATE || - gf_group->update_type[frame_index] == INTNL_OVERLAY_UPDATE); - gf_group->bit_allocation[frame_index] = target_frame_size; -#if MULTI_LVL_BOOST_VBR_CQ - if (cpi->new_bwdref_update_rule) { - gf_group->bit_allocation[frame_index] -= - (int)(target_frame_size * LEAF_REDUCTION_FACTOR); - } -#endif // MULTI_LVL_BOOST_VBR_CQ - } - - ++frame_index; - - // Skip all the extra-ARF's. - if (cpi->num_extra_arfs) { - while (gf_group->update_type[frame_index] == INTNL_ARF_UPDATE) - ++frame_index; - } - } - -#if USE_SYMM_MULTI_LAYER - if (cpi->new_bwdref_update_rule == 0 && rc->source_alt_ref_pending) { -#else - if (rc->source_alt_ref_pending) { -#endif - if (cpi->num_extra_arfs) { - // NOTE: For bit allocation, move the allocated bits associated with - // INTNL_OVERLAY_UPDATE to the corresponding INTNL_ARF_UPDATE. - // i > 0 for extra-ARF's and i == 0 for ARF: - // arf_pos_for_ovrly[i]: Position for INTNL_OVERLAY_UPDATE - // arf_pos_in_gf[i]: Position for INTNL_ARF_UPDATE - for (i = cpi->num_extra_arfs; i > 0; --i) { - assert(gf_group->update_type[cpi->arf_pos_for_ovrly[i]] == - INTNL_OVERLAY_UPDATE); - - // Encoder's choice: - // Set show_existing_frame == 1 for all extra-ARF's, and hence - // allocate zero bit for both all internal OVERLAY frames. - gf_group->bit_allocation[cpi->arf_pos_in_gf[i]] = - gf_group->bit_allocation[cpi->arf_pos_for_ovrly[i]]; - gf_group->bit_allocation[cpi->arf_pos_for_ovrly[i]] = 0; - } - } - } -} - -// Analyse and define a gf/arf group. -static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) { - AV1_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; - AV1EncoderConfig *const oxcf = &cpi->oxcf; - TWO_PASS *const twopass = &cpi->twopass; - FIRSTPASS_STATS next_frame; - const FIRSTPASS_STATS *const start_pos = twopass->stats_in; - int i; - - double boost_score = 0.0; -#if !CONFIG_FIX_GF_LENGTH - double old_boost_score = 0.0; - double mv_ratio_accumulator_thresh; - int active_max_gf_interval; - int active_min_gf_interval; -#endif - double gf_group_err = 0.0; -#if GROUP_ADAPTIVE_MAXQ - double gf_group_raw_error = 0.0; -#endif - double gf_group_skip_pct = 0.0; - double gf_group_inactive_zone_rows = 0.0; - double gf_first_frame_err = 0.0; - double mod_frame_err = 0.0; - - double mv_ratio_accumulator = 0.0; - double decay_accumulator = 1.0; - double zero_motion_accumulator = 1.0; - - double loop_decay_rate = 1.00; - double last_loop_decay_rate = 1.00; - - double this_frame_mv_in_out = 0.0; - double mv_in_out_accumulator = 0.0; - double abs_mv_in_out_accumulator = 0.0; - - unsigned int allow_alt_ref = is_altref_enabled(cpi); - - int f_boost = 0; - int b_boost = 0; - int flash_detected; - int64_t gf_group_bits; - double gf_group_error_left; - int gf_arf_bits; - const int is_key_frame = frame_is_intra_only(cm); - const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active; - - cpi->extra_arf_allowed = 1; - - // Reset the GF group data structures unless this is a key - // frame in which case it will already have been done. - if (is_key_frame == 0) { - av1_zero(twopass->gf_group); - } - - aom_clear_system_state(); - av1_zero(next_frame); - - // Load stats for the current frame. - mod_frame_err = calculate_modified_err(cpi, twopass, oxcf, this_frame); - - // Note the error of the frame at the start of the group. This will be - // the GF frame error if we code a normal gf. - gf_first_frame_err = mod_frame_err; - - // If this is a key frame or the overlay from a previous arf then - // the error score / cost of this frame has already been accounted for. - if (arf_active_or_kf) { - gf_group_err -= gf_first_frame_err; -#if GROUP_ADAPTIVE_MAXQ - gf_group_raw_error -= this_frame->coded_error; -#endif - gf_group_skip_pct -= this_frame->intra_skip_pct; - gf_group_inactive_zone_rows -= this_frame->inactive_zone_rows; - } -#if !CONFIG_FIX_GF_LENGTH - // Motion breakout threshold for loop below depends on image size. - mv_ratio_accumulator_thresh = - (cpi->initial_height + cpi->initial_width) / 4.0; - // Set a maximum and minimum interval for the GF group. - // If the image appears almost completely static we can extend beyond this. - { - int int_max_q = (int)(av1_convert_qindex_to_q( - twopass->active_worst_quality, cpi->common.seq_params.bit_depth)); - int int_lbq = (int)(av1_convert_qindex_to_q( - rc->last_boosted_qindex, cpi->common.seq_params.bit_depth)); - - active_min_gf_interval = rc->min_gf_interval + AOMMIN(2, int_max_q / 200); - if (active_min_gf_interval > rc->max_gf_interval) - active_min_gf_interval = rc->max_gf_interval; - - // The value chosen depends on the active Q range. At low Q we have - // bits to spare and are better with a smaller interval and smaller boost. - // At high Q when there are few bits to spare we are better with a longer - // interval to spread the cost of the GF. - active_max_gf_interval = 12 + AOMMIN(4, (int_lbq / 6)); - - // We have: active_min_gf_interval <= rc->max_gf_interval - if (active_max_gf_interval < active_min_gf_interval) - active_max_gf_interval = active_min_gf_interval; - else if (active_max_gf_interval > rc->max_gf_interval) - active_max_gf_interval = rc->max_gf_interval; - } -#endif // !CONFIG_FIX_GF_LENGTH - double avg_sr_coded_error = 0; - double avg_raw_err_stdev = 0; - int non_zero_stdev_count = 0; - - i = 0; - while (i < rc->static_scene_max_gf_interval && i < rc->frames_to_key) { - ++i; - - // Accumulate error score of frames in this gf group. - mod_frame_err = calculate_modified_err(cpi, twopass, oxcf, this_frame); - gf_group_err += mod_frame_err; -#if GROUP_ADAPTIVE_MAXQ - gf_group_raw_error += this_frame->coded_error; -#endif - gf_group_skip_pct += this_frame->intra_skip_pct; - gf_group_inactive_zone_rows += this_frame->inactive_zone_rows; - - if (EOF == input_stats(twopass, &next_frame)) break; - - // Test for the case where there is a brief flash but the prediction - // quality back to an earlier frame is then restored. - flash_detected = detect_flash(twopass, 0); - - // Update the motion related elements to the boost calculation. - accumulate_frame_motion_stats( - &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, - &abs_mv_in_out_accumulator, &mv_ratio_accumulator); - // sum up the metric values of current gf group - avg_sr_coded_error += next_frame.sr_coded_error; - if (fabs(next_frame.raw_error_stdev) > 0.000001) { - non_zero_stdev_count++; - avg_raw_err_stdev += next_frame.raw_error_stdev; - } - - // Accumulate the effect of prediction quality decay. - if (!flash_detected) { - last_loop_decay_rate = loop_decay_rate; - loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame); - - decay_accumulator = decay_accumulator * loop_decay_rate; - - // Monitor for static sections. - zero_motion_accumulator = AOMMIN( - zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame)); - - // Break clause to detect very still sections after motion. For example, - // a static image after a fade or other transition. - if (detect_transition_to_still(cpi, i, 5, loop_decay_rate, - last_loop_decay_rate)) { - allow_alt_ref = 0; - break; - } - } - - // Calculate a boost number for this frame. - boost_score += - decay_accumulator * - calc_frame_boost(cpi, &next_frame, this_frame_mv_in_out, GF_MAX_BOOST); -#if CONFIG_FIX_GF_LENGTH - if (i == (FIXED_GF_LENGTH + 1)) break; -#else - // Skip breaking condition for CONFIG_FIX_GF_LENGTH - // Break out conditions. - if ( - // Break at active_max_gf_interval unless almost totally static. - (i >= (active_max_gf_interval + arf_active_or_kf) && - zero_motion_accumulator < 0.995) || - ( - // Don't break out with a very short interval. - (i >= active_min_gf_interval + arf_active_or_kf) && - (!flash_detected) && - ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) || - (abs_mv_in_out_accumulator > 3.0) || - (mv_in_out_accumulator < -2.0) || - ((boost_score - old_boost_score) < BOOST_BREAKOUT)))) { - // If GF group interval is < 12, we force it to be 8. Otherwise, - // if it is >= 12, we keep it as is. - // NOTE: 'i' is 1 more than the GF group interval candidate that is being - // checked. - if (i == (8 + 1) || i >= (12 + 1)) { - boost_score = old_boost_score; - break; - } - } - old_boost_score = boost_score; -#endif // CONFIG_FIX_GF_LENGTH - *this_frame = next_frame; - } - twopass->gf_zeromotion_pct = (int)(zero_motion_accumulator * 1000.0); - - // Was the group length constrained by the requirement for a new KF? - rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0; - - const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs - : cpi->common.MBs; - assert(num_mbs > 0); - if (i) avg_sr_coded_error /= i; - - if (non_zero_stdev_count) avg_raw_err_stdev /= non_zero_stdev_count; - - // Disable extra altrefs and backward refs for "still" gf group: - // zero_motion_accumulator: minimum percentage of (0,0) motion; - // avg_sr_coded_error: average of the SSE per pixel of each frame; - // avg_raw_err_stdev: average of the standard deviation of (0,0) - // motion error per block of each frame. - const int disable_bwd_extarf = - (zero_motion_accumulator > MIN_ZERO_MOTION && - avg_sr_coded_error / num_mbs < MAX_SR_CODED_ERROR && - avg_raw_err_stdev < MAX_RAW_ERR_VAR); - - if (disable_bwd_extarf) cpi->extra_arf_allowed = 0; - -#define REDUCE_GF_LENGTH_THRESH 4 -#define REDUCE_GF_LENGTH_TO_KEY_THRESH 9 -#define REDUCE_GF_LENGTH_BY 1 - int alt_offset = 0; -#if REDUCE_LAST_GF_LENGTH - // TODO(weitinglin): The length reduction stretagy is tweaking using AOM_Q - // mode, and hurting the performance of VBR mode. We need to investigate how - // to adjust GF length for other modes. - - int allow_gf_length_reduction = - cpi->oxcf.rc_mode == AOM_Q || cpi->extra_arf_allowed == 0; - - // We are going to have an alt ref, but we don't have do adjustment for - // lossless mode - if (allow_alt_ref && allow_gf_length_reduction && - (i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval) && - !is_lossless_requested(&cpi->oxcf)) { - // adjust length of this gf group if one of the following condition met - // 1: only one overlay frame left and this gf is too long - // 2: next gf group is too short to have arf compared to the current gf - - // maximum length of next gf group - const int next_gf_len = rc->frames_to_key - i; - const int single_overlay_left = - next_gf_len == 0 && i > REDUCE_GF_LENGTH_THRESH; - // the next gf is probably going to have a ARF but it will be shorter than - // this gf - const int unbalanced_gf = - i > REDUCE_GF_LENGTH_TO_KEY_THRESH && - next_gf_len + 1 < REDUCE_GF_LENGTH_TO_KEY_THRESH && - next_gf_len + 1 >= rc->min_gf_interval; - - if (single_overlay_left || unbalanced_gf) { - // Note: Tried roll_back = DIVIDE_AND_ROUND(i, 8), but is does not work - // better in the current setting - const int roll_back = REDUCE_GF_LENGTH_BY; - alt_offset = -roll_back; - i -= roll_back; - } - } -#endif - - // Should we use the alternate reference frame. - if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) && - (i >= rc->min_gf_interval)) { - // Calculate the boost for alt ref. - rc->gfu_boost = - calc_arf_boost(cpi, alt_offset, (i - 1), (i - 1), &f_boost, &b_boost); - rc->source_alt_ref_pending = 1; - - // do not replace ARFs with overlay frames, and keep it as GOLDEN_REF - cpi->preserve_arf_as_gld = 1; - } else { - rc->gfu_boost = AOMMAX((int)boost_score, MIN_ARF_GF_BOOST); - rc->source_alt_ref_pending = 0; - cpi->preserve_arf_as_gld = 0; - } - - // Set the interval until the next gf. - // If forward keyframes are enabled, ensure the final gf group obeys the - // MIN_FWD_KF_INTERVAL. - if (cpi->oxcf.fwd_kf_enabled && - ((twopass->stats_in - i + rc->frames_to_key) < twopass->stats_in_end)) { - if (i == rc->frames_to_key) { - rc->baseline_gf_interval = i; - // if the last gf group will be smaller than MIN_FWD_KF_INTERVAL - } else if ((rc->frames_to_key - i < - AOMMAX(MIN_FWD_KF_INTERVAL, rc->min_gf_interval)) && - (rc->frames_to_key != i)) { - // if possible, merge the last two gf groups - if (rc->frames_to_key <= MAX_PYRAMID_SIZE) { - rc->baseline_gf_interval = rc->frames_to_key; - // if merging the last two gf groups creates a group that is too long, - // split them and force the last gf group to be the MIN_FWD_KF_INTERVAL - } else { - rc->baseline_gf_interval = rc->frames_to_key - MIN_FWD_KF_INTERVAL; - } - } else { - rc->baseline_gf_interval = - i - (is_key_frame || rc->source_alt_ref_pending); - } - } else { - rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending); - } - -#if REDUCE_LAST_ALT_BOOST -#define LAST_ALR_BOOST_FACTOR 0.2f - rc->arf_boost_factor = 1.0; - if (rc->source_alt_ref_pending && !is_lossless_requested(&cpi->oxcf)) { - // Reduce the boost of altref in the last gf group - if (rc->frames_to_key - i == REDUCE_GF_LENGTH_BY || - rc->frames_to_key - i == 0) { - rc->arf_boost_factor = LAST_ALR_BOOST_FACTOR; - } - } -#endif - - if (!cpi->extra_arf_allowed) { - cpi->num_extra_arfs = 0; - } else { -#if USE_SYMM_MULTI_LAYER - if (rc->baseline_gf_interval == 4 && rc->source_alt_ref_pending) - cpi->num_extra_arfs = 1; - else - cpi->num_extra_arfs = get_number_of_extra_arfs( - rc->baseline_gf_interval, rc->source_alt_ref_pending); -#else - // Compute how many extra alt_refs we can have - cpi->num_extra_arfs = get_number_of_extra_arfs(rc->baseline_gf_interval, - rc->source_alt_ref_pending); -#endif // USE_SYMM_MULTI_LAYER - } - -#if !USE_SYMM_MULTI_LAYER - // Currently at maximum two extra ARFs' are allowed - assert(cpi->num_extra_arfs <= MAX_EXT_ARFS); -#endif - - rc->frames_till_gf_update_due = rc->baseline_gf_interval; - - rc->bipred_group_interval = BFG_INTERVAL; - // The minimum bi-predictive frame group interval is 2. - if (rc->bipred_group_interval < 2) rc->bipred_group_interval = 0; - - // Reset the file position. - reset_fpf_position(twopass, start_pos); - - // Calculate the bits to be allocated to the gf/arf group as a whole - gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err); - -#if GROUP_ADAPTIVE_MAXQ - // Calculate an estimate of the maxq needed for the group. - // We are more agressive about correcting for sections - // where there could be significant overshoot than for easier - // sections where we do not wish to risk creating an overshoot - // of the allocated bit budget. - if ((cpi->oxcf.rc_mode != AOM_Q) && (rc->baseline_gf_interval > 1)) { - const int vbr_group_bits_per_frame = - (int)(gf_group_bits / rc->baseline_gf_interval); - const double group_av_err = gf_group_raw_error / rc->baseline_gf_interval; - const double group_av_skip_pct = - gf_group_skip_pct / rc->baseline_gf_interval; - const double group_av_inactive_zone = - ((gf_group_inactive_zone_rows * 2) / - (rc->baseline_gf_interval * (double)cm->mb_rows)); - - int tmp_q; - // rc factor is a weight factor that corrects for local rate control drift. - double rc_factor = 1.0; - if (rc->rate_error_estimate > 0) { - rc_factor = AOMMAX(RC_FACTOR_MIN, - (double)(100 - rc->rate_error_estimate) / 100.0); - } else { - rc_factor = AOMMIN(RC_FACTOR_MAX, - (double)(100 - rc->rate_error_estimate) / 100.0); - } - tmp_q = get_twopass_worst_quality( - cpi, group_av_err, (group_av_skip_pct + group_av_inactive_zone), - vbr_group_bits_per_frame, twopass->kfgroup_inter_fraction * rc_factor); - twopass->active_worst_quality = - AOMMAX(tmp_q, twopass->active_worst_quality >> 1); - } -#endif - - // Calculate the extra bits to be used for boosted frame(s) - gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, rc->gfu_boost, - gf_group_bits); - - // Adjust KF group bits and error remaining. - twopass->kf_group_error_left -= (int64_t)gf_group_err; - - // If this is an arf update we want to remove the score for the overlay - // frame at the end which will usually be very cheap to code. - // The overlay frame has already, in effect, been coded so we want to spread - // the remaining bits among the other frames. - // For normal GFs remove the score for the GF itself unless this is - // also a key frame in which case it has already been accounted for. - if (rc->source_alt_ref_pending) { - gf_group_error_left = gf_group_err - mod_frame_err; - } else if (is_key_frame == 0) { - gf_group_error_left = gf_group_err - gf_first_frame_err; - } else { - gf_group_error_left = gf_group_err; - } - - // Allocate bits to each of the frames in the GF group. - allocate_gf_group_bits(cpi, gf_group_bits, gf_group_error_left, gf_arf_bits); - - // Reset the file position. - reset_fpf_position(twopass, start_pos); - - // Calculate a section intra ratio used in setting max loop filter. - if (cpi->common.frame_type != KEY_FRAME) { - twopass->section_intra_rating = calculate_section_intra_ratio( - start_pos, twopass->stats_in_end, rc->baseline_gf_interval); - } -} - -// Threshold for use of the lagging second reference frame. High second ref -// usage may point to a transient event like a flash or occlusion rather than -// a real scene cut. -#define SECOND_REF_USEAGE_THRESH 0.1 -// Minimum % intra coding observed in first pass (1.0 = 100%) -#define MIN_INTRA_LEVEL 0.25 -// Minimum ratio between the % of intra coding and inter coding in the first -// pass after discounting neutral blocks (discounting neutral blocks in this -// way helps catch scene cuts in clips with very flat areas or letter box -// format clips with image padding. -#define INTRA_VS_INTER_THRESH 2.0 -// Hard threshold where the first pass chooses intra for almost all blocks. -// In such a case even if the frame is not a scene cut coding a key frame -// may be a good option. -#define VERY_LOW_INTER_THRESH 0.05 -// Maximum threshold for the relative ratio of intra error score vs best -// inter error score. -#define KF_II_ERR_THRESHOLD 2.5 -// In real scene cuts there is almost always a sharp change in the intra -// or inter error score. -#define ERR_CHANGE_THRESHOLD 0.4 -// For real scene cuts we expect an improvment in the intra inter error -// ratio in the next frame. -#define II_IMPROVEMENT_THRESHOLD 3.5 -#define KF_II_MAX 128.0 - -static int test_candidate_kf(TWO_PASS *twopass, - const FIRSTPASS_STATS *last_frame, - const FIRSTPASS_STATS *this_frame, - const FIRSTPASS_STATS *next_frame) { - int is_viable_kf = 0; - double pcnt_intra = 1.0 - this_frame->pcnt_inter; - double modified_pcnt_inter = - this_frame->pcnt_inter - this_frame->pcnt_neutral; - - // Does the frame satisfy the primary criteria of a key frame? - // See above for an explanation of the test criteria. - // If so, then examine how well it predicts subsequent frames. - if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) && - (next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) && - ((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) || - ((pcnt_intra > MIN_INTRA_LEVEL) && - (pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) && - ((this_frame->intra_error / - DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) < - KF_II_ERR_THRESHOLD) && - ((fabs(last_frame->coded_error - this_frame->coded_error) / - DOUBLE_DIVIDE_CHECK(this_frame->coded_error) > - ERR_CHANGE_THRESHOLD) || - (fabs(last_frame->intra_error - this_frame->intra_error) / - DOUBLE_DIVIDE_CHECK(this_frame->intra_error) > - ERR_CHANGE_THRESHOLD) || - ((next_frame->intra_error / - DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > - II_IMPROVEMENT_THRESHOLD))))) { - int i; - const FIRSTPASS_STATS *start_pos = twopass->stats_in; - FIRSTPASS_STATS local_next_frame = *next_frame; - double boost_score = 0.0; - double old_boost_score = 0.0; - double decay_accumulator = 1.0; - - // Examine how well the key frame predicts subsequent frames. - for (i = 0; i < 16; ++i) { - double next_iiratio = (BOOST_FACTOR * local_next_frame.intra_error / - DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error)); - - if (next_iiratio > KF_II_MAX) next_iiratio = KF_II_MAX; - - // Cumulative effect of decay in prediction quality. - if (local_next_frame.pcnt_inter > 0.85) - decay_accumulator *= local_next_frame.pcnt_inter; - else - decay_accumulator *= (0.85 + local_next_frame.pcnt_inter) / 2.0; - - // Keep a running total. - boost_score += (decay_accumulator * next_iiratio); - - // Test various breakout clauses. - if ((local_next_frame.pcnt_inter < 0.05) || (next_iiratio < 1.5) || - (((local_next_frame.pcnt_inter - local_next_frame.pcnt_neutral) < - 0.20) && - (next_iiratio < 3.0)) || - ((boost_score - old_boost_score) < 3.0) || - (local_next_frame.intra_error < 200)) { - break; - } - - old_boost_score = boost_score; - - // Get the next frame details - if (EOF == input_stats(twopass, &local_next_frame)) break; - } - - // If there is tolerable prediction for at least the next 3 frames then - // break out else discard this potential key frame and move on - if (boost_score > 30.0 && (i > 3)) { - is_viable_kf = 1; - } else { - // Reset the file position - reset_fpf_position(twopass, start_pos); - - is_viable_kf = 0; - } - } - - return is_viable_kf; -} - -#define FRAMES_TO_CHECK_DECAY 8 - -static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) { - int i, j; - RATE_CONTROL *const rc = &cpi->rc; - TWO_PASS *const twopass = &cpi->twopass; - GF_GROUP *const gf_group = &twopass->gf_group; - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - const FIRSTPASS_STATS first_frame = *this_frame; - const FIRSTPASS_STATS *const start_position = twopass->stats_in; - FIRSTPASS_STATS next_frame; - FIRSTPASS_STATS last_frame; - int kf_bits = 0; - int loop_decay_counter = 0; - double decay_accumulator = 1.0; - double av_decay_accumulator = 0.0; - double zero_motion_accumulator = 1.0; - double boost_score = 0.0; - double kf_mod_err = 0.0; - double kf_group_err = 0.0; - double recent_loop_decay[FRAMES_TO_CHECK_DECAY]; - - av1_zero(next_frame); - - cpi->common.frame_type = KEY_FRAME; - - // Reset the GF group data structures. - av1_zero(*gf_group); - - // Is this a forced key frame by interval. - rc->this_key_frame_forced = rc->next_key_frame_forced; - - // Clear the alt ref active flag and last group multi arf flags as they - // can never be set for a key frame. - rc->source_alt_ref_active = 0; - - // KF is always a GF so clear frames till next gf counter. - rc->frames_till_gf_update_due = 0; - - rc->frames_to_key = 1; - - twopass->kf_group_bits = 0; // Total bits available to kf group - twopass->kf_group_error_left = 0; // Group modified error score. - - kf_mod_err = calculate_modified_err(cpi, twopass, oxcf, this_frame); - - // Initialize the decay rates for the recent frames to check - for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) recent_loop_decay[j] = 1.0; - - // Find the next keyframe. - i = 0; - while (twopass->stats_in < twopass->stats_in_end && - rc->frames_to_key < cpi->oxcf.key_freq) { - // Accumulate kf group error. - kf_group_err += calculate_modified_err(cpi, twopass, oxcf, this_frame); - - // Load the next frame's stats. - last_frame = *this_frame; - input_stats(twopass, this_frame); - - // Provided that we are not at the end of the file... - if (cpi->oxcf.auto_key && twopass->stats_in < twopass->stats_in_end) { - double loop_decay_rate; - - // Check for a scene cut. - if (test_candidate_kf(twopass, &last_frame, this_frame, - twopass->stats_in)) - break; - - // How fast is the prediction quality decaying? - loop_decay_rate = get_prediction_decay_rate(cpi, twopass->stats_in); - - // We want to know something about the recent past... rather than - // as used elsewhere where we are concerned with decay in prediction - // quality since the last GF or KF. - recent_loop_decay[i % FRAMES_TO_CHECK_DECAY] = loop_decay_rate; - decay_accumulator = 1.0; - for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) - decay_accumulator *= recent_loop_decay[j]; - - // Special check for transition or high motion followed by a - // static scene. - if (detect_transition_to_still(cpi, i, cpi->oxcf.key_freq - i, - loop_decay_rate, decay_accumulator)) - break; - - // Step on to the next frame. - ++rc->frames_to_key; - - // If we don't have a real key frame within the next two - // key_freq intervals then break out of the loop. - if (rc->frames_to_key >= 2 * cpi->oxcf.key_freq) break; - } else { - ++rc->frames_to_key; - } - ++i; - } - - // If there is a max kf interval set by the user we must obey it. - // We already breakout of the loop above at 2x max. - // This code centers the extra kf if the actual natural interval - // is between 1x and 2x. - if (cpi->oxcf.auto_key && rc->frames_to_key > cpi->oxcf.key_freq) { - FIRSTPASS_STATS tmp_frame = first_frame; - - rc->frames_to_key /= 2; - - // Reset to the start of the group. - reset_fpf_position(twopass, start_position); - - kf_group_err = 0.0; - - // Rescan to get the correct error data for the forced kf group. - for (i = 0; i < rc->frames_to_key; ++i) { - kf_group_err += calculate_modified_err(cpi, twopass, oxcf, &tmp_frame); - input_stats(twopass, &tmp_frame); - } - rc->next_key_frame_forced = 1; - } else if (twopass->stats_in == twopass->stats_in_end || - rc->frames_to_key >= cpi->oxcf.key_freq) { - rc->next_key_frame_forced = 1; - } else { - rc->next_key_frame_forced = 0; - } - - // Special case for the last key frame of the file. - if (twopass->stats_in >= twopass->stats_in_end) { - // Accumulate kf group error. - kf_group_err += calculate_modified_err(cpi, twopass, oxcf, this_frame); - } - - // Calculate the number of bits that should be assigned to the kf group. - if (twopass->bits_left > 0 && twopass->modified_error_left > 0.0) { - // Maximum number of bits for a single normal frame (not key frame). - const int max_bits = frame_max_bits(rc, &cpi->oxcf); - - // Maximum number of bits allocated to the key frame group. - int64_t max_grp_bits; - - // Default allocation based on bits left and relative - // complexity of the section. - twopass->kf_group_bits = (int64_t)( - twopass->bits_left * (kf_group_err / twopass->modified_error_left)); - - // Clip based on maximum per frame rate defined by the user. - max_grp_bits = (int64_t)max_bits * (int64_t)rc->frames_to_key; - if (twopass->kf_group_bits > max_grp_bits) - twopass->kf_group_bits = max_grp_bits; - } else { - twopass->kf_group_bits = 0; - } - twopass->kf_group_bits = AOMMAX(0, twopass->kf_group_bits); - - // Reset the first pass file position. - reset_fpf_position(twopass, start_position); - - // Scan through the kf group collating various stats used to determine - // how many bits to spend on it. - decay_accumulator = 1.0; - boost_score = 0.0; - const double kf_max_boost = - cpi->oxcf.rc_mode == AOM_Q - ? AOMMIN(AOMMAX(rc->frames_to_key * 2.0, KF_MIN_FRAME_BOOST), - KF_MAX_FRAME_BOOST) - : KF_MAX_FRAME_BOOST; - for (i = 0; i < (rc->frames_to_key - 1); ++i) { - if (EOF == input_stats(twopass, &next_frame)) break; - - // Monitor for static sections. - zero_motion_accumulator = AOMMIN(zero_motion_accumulator, - get_zero_motion_factor(cpi, &next_frame)); - - // Not all frames in the group are necessarily used in calculating boost. - if ((i <= rc->max_gf_interval) || - ((i <= (rc->max_gf_interval * 4)) && (decay_accumulator > 0.5))) { - const double frame_boost = - calc_frame_boost(cpi, this_frame, 0, kf_max_boost); - - // How fast is prediction quality decaying. - if (!detect_flash(twopass, 0)) { - const double loop_decay_rate = - get_prediction_decay_rate(cpi, &next_frame); - decay_accumulator *= loop_decay_rate; - decay_accumulator = AOMMAX(decay_accumulator, MIN_DECAY_FACTOR); - av_decay_accumulator += decay_accumulator; - ++loop_decay_counter; - } - boost_score += (decay_accumulator * frame_boost); - } - } - if (loop_decay_counter > 0) - av_decay_accumulator /= (double)loop_decay_counter; - - reset_fpf_position(twopass, start_position); - - // Store the zero motion percentage - twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0); - - // Calculate a section intra ratio used in setting max loop filter. - twopass->section_intra_rating = calculate_section_intra_ratio( - start_position, twopass->stats_in_end, rc->frames_to_key); - - // Apply various clamps for min and max boost - rc->kf_boost = (int)(av_decay_accumulator * boost_score); - rc->kf_boost = AOMMAX(rc->kf_boost, (rc->frames_to_key * 3)); - rc->kf_boost = AOMMAX(rc->kf_boost, MIN_KF_BOOST); - - // Work out how many bits to allocate for the key frame itself. - kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost, - twopass->kf_group_bits); - // printf("kf boost = %d kf_bits = %d kf_zeromotion_pct = %d\n", rc->kf_boost, - // kf_bits, twopass->kf_zeromotion_pct); - - // Work out the fraction of the kf group bits reserved for the inter frames - // within the group after discounting the bits for the kf itself. - if (twopass->kf_group_bits) { - twopass->kfgroup_inter_fraction = - (double)(twopass->kf_group_bits - kf_bits) / - (double)twopass->kf_group_bits; - } else { - twopass->kfgroup_inter_fraction = 1.0; - } - - twopass->kf_group_bits -= kf_bits; - - // Save the bits to spend on the key frame. - gf_group->bit_allocation[0] = kf_bits; - gf_group->update_type[0] = KF_UPDATE; - gf_group->rf_level[0] = KF_STD; - - // Note the total error score of the kf group minus the key frame itself. - twopass->kf_group_error_left = (int)(kf_group_err - kf_mod_err); - - // Adjust the count of total modified error left. - // The count of bits left is adjusted elsewhere based on real coded frame - // sizes. - twopass->modified_error_left -= kf_group_err; -} - -// Define the reference buffers that will be updated post encode. -static void configure_buffer_updates(AV1_COMP *cpi) { - TWO_PASS *const twopass = &cpi->twopass; - - // NOTE(weitinglin): Should we define another function to take care of - // cpi->rc.is_$Source_Type to make this function as it is in the comment? - - cpi->rc.is_src_frame_alt_ref = 0; - cpi->rc.is_bwd_ref_frame = 0; - cpi->rc.is_last_bipred_frame = 0; - cpi->rc.is_bipred_frame = 0; - cpi->rc.is_src_frame_ext_arf = 0; - - switch (twopass->gf_group.update_type[twopass->gf_group.index]) { - case KF_UPDATE: - cpi->refresh_last_frame = 1; - cpi->refresh_golden_frame = 1; - cpi->refresh_bwd_ref_frame = 1; - cpi->refresh_alt2_ref_frame = 1; - cpi->refresh_alt_ref_frame = 1; - break; - - case LF_UPDATE: - cpi->refresh_last_frame = 1; - cpi->refresh_golden_frame = 0; - cpi->refresh_bwd_ref_frame = 0; - cpi->refresh_alt2_ref_frame = 0; - cpi->refresh_alt_ref_frame = 0; - break; - - case GF_UPDATE: - // TODO(zoeliu): To further investigate whether 'refresh_last_frame' is - // needed. - cpi->refresh_last_frame = 1; - cpi->refresh_golden_frame = 1; - cpi->refresh_bwd_ref_frame = 0; - cpi->refresh_alt2_ref_frame = 0; - cpi->refresh_alt_ref_frame = 0; - break; - - case OVERLAY_UPDATE: - cpi->refresh_last_frame = 0; - cpi->refresh_golden_frame = 1; - cpi->refresh_bwd_ref_frame = 0; - cpi->refresh_alt2_ref_frame = 0; - cpi->refresh_alt_ref_frame = 0; - - cpi->rc.is_src_frame_alt_ref = 1; - break; - - case ARF_UPDATE: - cpi->refresh_last_frame = 0; - cpi->refresh_golden_frame = 0; - // NOTE: BWDREF does not get updated along with ALTREF_FRAME. - cpi->refresh_bwd_ref_frame = 0; - cpi->refresh_alt2_ref_frame = 0; - cpi->refresh_alt_ref_frame = 1; - break; - - case BRF_UPDATE: - cpi->refresh_last_frame = 0; - cpi->refresh_golden_frame = 0; - cpi->refresh_bwd_ref_frame = 1; - cpi->refresh_alt2_ref_frame = 0; - cpi->refresh_alt_ref_frame = 0; - - cpi->rc.is_bwd_ref_frame = 1; - break; - - case LAST_BIPRED_UPDATE: - cpi->refresh_last_frame = 1; - cpi->refresh_golden_frame = 0; - cpi->refresh_bwd_ref_frame = 0; - cpi->refresh_alt2_ref_frame = 0; - cpi->refresh_alt_ref_frame = 0; - - cpi->rc.is_last_bipred_frame = 1; - break; - - case BIPRED_UPDATE: - cpi->refresh_last_frame = 1; - cpi->refresh_golden_frame = 0; - cpi->refresh_bwd_ref_frame = 0; - cpi->refresh_alt2_ref_frame = 0; - cpi->refresh_alt_ref_frame = 0; - - cpi->rc.is_bipred_frame = 1; - break; - - case INTNL_OVERLAY_UPDATE: - cpi->refresh_last_frame = 1; - cpi->refresh_golden_frame = 0; - cpi->refresh_bwd_ref_frame = 0; - cpi->refresh_alt2_ref_frame = 0; - cpi->refresh_alt_ref_frame = 0; - - cpi->rc.is_src_frame_alt_ref = 1; - cpi->rc.is_src_frame_ext_arf = 1; - break; - - case INTNL_ARF_UPDATE: - cpi->refresh_last_frame = 0; - cpi->refresh_golden_frame = 0; -#if USE_SYMM_MULTI_LAYER - if (cpi->new_bwdref_update_rule == 1) { - cpi->refresh_bwd_ref_frame = 1; - cpi->refresh_alt2_ref_frame = 0; - } else { -#endif - cpi->refresh_bwd_ref_frame = 0; - cpi->refresh_alt2_ref_frame = 1; -#if USE_SYMM_MULTI_LAYER - } -#endif - cpi->refresh_alt_ref_frame = 0; - break; - - default: assert(0); break; - } -} - -void av1_configure_buffer_updates_firstpass(AV1_COMP *cpi, - FRAME_UPDATE_TYPE update_type) { - RATE_CONTROL *rc = &cpi->rc; - - cpi->refresh_last_frame = 1; - cpi->refresh_golden_frame = 0; - cpi->refresh_bwd_ref_frame = 0; - cpi->refresh_alt2_ref_frame = 0; - cpi->refresh_alt_ref_frame = 0; - - rc->is_bwd_ref_frame = 0; - - switch (update_type) { - case ARF_UPDATE: - cpi->refresh_alt_ref_frame = 1; - cpi->refresh_last_frame = 0; - cpi->refresh_golden_frame = 0; - cpi->refresh_bwd_ref_frame = 0; - cpi->refresh_alt2_ref_frame = 0; - - rc->is_src_frame_alt_ref = 0; - break; - case INTNL_ARF_UPDATE: - cpi->refresh_alt2_ref_frame = 1; - cpi->refresh_last_frame = 0; - cpi->refresh_golden_frame = 0; - cpi->refresh_bwd_ref_frame = 0; - cpi->refresh_alt_ref_frame = 0; - rc->is_src_frame_alt_ref = 0; - rc->is_src_frame_ext_arf = 0; - - break; - case BIPRED_UPDATE: - cpi->refresh_bwd_ref_frame = 1; - cpi->refresh_last_frame = 0; - cpi->refresh_golden_frame = 0; - cpi->refresh_alt2_ref_frame = 0; - cpi->refresh_alt_ref_frame = 0; - - rc->is_bwd_ref_frame = 1; - break; - default: break; - } -} - -static int is_skippable_frame(const AV1_COMP *cpi) { - // If the current frame does not have non-zero motion vector detected in the - // first pass, and so do its previous and forward frames, then this frame - // can be skipped for partition check, and the partition size is assigned - // according to the variance - const TWO_PASS *const twopass = &cpi->twopass; - - return (!frame_is_intra_only(&cpi->common) && - twopass->stats_in - 2 > twopass->stats_in_start && - twopass->stats_in < twopass->stats_in_end && - (twopass->stats_in - 1)->pcnt_inter - - (twopass->stats_in - 1)->pcnt_motion == - 1 && - (twopass->stats_in - 2)->pcnt_inter - - (twopass->stats_in - 2)->pcnt_motion == - 1 && - twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1); -} - -void av1_rc_get_second_pass_params(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; - TWO_PASS *const twopass = &cpi->twopass; - GF_GROUP *const gf_group = &twopass->gf_group; - int frames_left; - FIRSTPASS_STATS this_frame; - - int target_rate; - - frames_left = (int)(twopass->total_stats.count - cm->current_video_frame); - - if (!twopass->stats_in) return; - - // If this is an arf frame then we dont want to read the stats file or - // advance the input pointer as we already have what we need. - if (gf_group->update_type[gf_group->index] == ARF_UPDATE || - gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) { - configure_buffer_updates(cpi); - target_rate = gf_group->bit_allocation[gf_group->index]; - target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate); - rc->base_frame_target = target_rate; - - if (cpi->no_show_kf) { - assert(gf_group->update_type[gf_group->index] == ARF_UPDATE); - cm->frame_type = KEY_FRAME; - } else { - cm->frame_type = INTER_FRAME; - } - - // Do the firstpass stats indicate that this frame is skippable for the - // partition search? - if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2) { - cpi->partition_search_skippable_frame = is_skippable_frame(cpi); - } - - return; - } - - aom_clear_system_state(); - - if (cpi->oxcf.rc_mode == AOM_Q) { - twopass->active_worst_quality = cpi->oxcf.cq_level; - } else if (cm->current_video_frame == 0) { - // Special case code for first frame. - const int section_target_bandwidth = - (int)(twopass->bits_left / frames_left); - const double section_length = twopass->total_left_stats.count; - const double section_error = - twopass->total_left_stats.coded_error / section_length; - const double section_intra_skip = - twopass->total_left_stats.intra_skip_pct / section_length; - const double section_inactive_zone = - (twopass->total_left_stats.inactive_zone_rows * 2) / - ((double)cm->mb_rows * section_length); - const int tmp_q = get_twopass_worst_quality( - cpi, section_error, section_intra_skip + section_inactive_zone, - section_target_bandwidth, DEFAULT_GRP_WEIGHT); - - twopass->active_worst_quality = tmp_q; - twopass->baseline_active_worst_quality = tmp_q; - rc->ni_av_qi = tmp_q; - rc->last_q[INTER_FRAME] = tmp_q; - rc->avg_q = av1_convert_qindex_to_q(tmp_q, cm->seq_params.bit_depth); - rc->avg_frame_qindex[INTER_FRAME] = tmp_q; - rc->last_q[KEY_FRAME] = (tmp_q + cpi->oxcf.best_allowed_q) / 2; - rc->avg_frame_qindex[KEY_FRAME] = rc->last_q[KEY_FRAME]; - } - - av1_zero(this_frame); - if (EOF == input_stats(twopass, &this_frame)) return; - - // Set the frame content type flag. - if (this_frame.intra_skip_pct >= FC_ANIMATION_THRESH) - twopass->fr_content_type = FC_GRAPHICS_ANIMATION; - else - twopass->fr_content_type = FC_NORMAL; - - // Keyframe and section processing. - if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) { - FIRSTPASS_STATS this_frame_copy; - this_frame_copy = this_frame; - // Define next KF group and assign bits to it. - find_next_key_frame(cpi, &this_frame); - this_frame = this_frame_copy; - } else { - cm->frame_type = INTER_FRAME; - } - - // Define a new GF/ARF group. (Should always enter here for key frames). - if (rc->frames_till_gf_update_due == 0) { - define_gf_group(cpi, &this_frame); - - rc->frames_till_gf_update_due = rc->baseline_gf_interval; - -#if ARF_STATS_OUTPUT - { - FILE *fpfile; - fpfile = fopen("arf.stt", "a"); - ++arf_count; - fprintf(fpfile, "%10d %10d %10d %10d %10d\n", cm->current_video_frame, - rc->frames_till_gf_update_due, rc->kf_boost, arf_count, - rc->gfu_boost); - - fclose(fpfile); - } -#endif - } - - configure_buffer_updates(cpi); - - // Do the firstpass stats indicate that this frame is skippable for the - // partition search? - if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2) { - cpi->partition_search_skippable_frame = is_skippable_frame(cpi); - } - - target_rate = gf_group->bit_allocation[gf_group->index]; - - if (cpi->common.frame_type == KEY_FRAME) - target_rate = av1_rc_clamp_iframe_target_size(cpi, target_rate); - else - target_rate = av1_rc_clamp_pframe_target_size(cpi, target_rate); - - rc->base_frame_target = target_rate; - - { - const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) - ? cpi->initial_mbs - : cpi->common.MBs; - // The multiplication by 256 reverses a scaling factor of (>> 8) - // applied when combining MB error values for the frame. - twopass->mb_av_energy = log((this_frame.intra_error / num_mbs) + 1.0); - twopass->frame_avg_haar_energy = - log((this_frame.frame_avg_wavelet_energy / num_mbs) + 1.0); - } - - // Update the total stats remaining structure. - subtract_stats(&twopass->total_left_stats, &this_frame); -} - -#define MINQ_ADJ_LIMIT 48 -#define MINQ_ADJ_LIMIT_CQ 20 -#define HIGH_UNDERSHOOT_RATIO 2 -void av1_twopass_postencode_update(AV1_COMP *cpi) { - TWO_PASS *const twopass = &cpi->twopass; - RATE_CONTROL *const rc = &cpi->rc; - const int bits_used = rc->base_frame_target; - - // VBR correction is done through rc->vbr_bits_off_target. Based on the - // sign of this value, a limited % adjustment is made to the target rate - // of subsequent frames, to try and push it back towards 0. This method - // is designed to prevent extreme behaviour at the end of a clip - // or group of frames. - rc->vbr_bits_off_target += rc->base_frame_target - rc->projected_frame_size; - twopass->bits_left = AOMMAX(twopass->bits_left - bits_used, 0); - - // Calculate the pct rc error. - if (rc->total_actual_bits) { - rc->rate_error_estimate = - (int)((rc->vbr_bits_off_target * 100) / rc->total_actual_bits); - rc->rate_error_estimate = clamp(rc->rate_error_estimate, -100, 100); - } else { - rc->rate_error_estimate = 0; - } - - if (cpi->common.frame_type != KEY_FRAME) { - twopass->kf_group_bits -= bits_used; - twopass->last_kfgroup_zeromotion_pct = twopass->kf_zeromotion_pct; - } - twopass->kf_group_bits = AOMMAX(twopass->kf_group_bits, 0); - - // If the rate control is drifting consider adjustment to min or maxq. - if ((cpi->oxcf.rc_mode != AOM_Q) && - (cpi->twopass.gf_zeromotion_pct < VLOW_MOTION_THRESHOLD) && - !cpi->rc.is_src_frame_alt_ref) { - const int maxq_adj_limit = - rc->worst_quality - twopass->active_worst_quality; - const int minq_adj_limit = - (cpi->oxcf.rc_mode == AOM_CQ ? MINQ_ADJ_LIMIT_CQ : MINQ_ADJ_LIMIT); - - // Undershoot. - if (rc->rate_error_estimate > cpi->oxcf.under_shoot_pct) { - --twopass->extend_maxq; - if (rc->rolling_target_bits >= rc->rolling_actual_bits) - ++twopass->extend_minq; - // Overshoot. - } else if (rc->rate_error_estimate < -cpi->oxcf.over_shoot_pct) { - --twopass->extend_minq; - if (rc->rolling_target_bits < rc->rolling_actual_bits) - ++twopass->extend_maxq; - } else { - // Adjustment for extreme local overshoot. - if (rc->projected_frame_size > (2 * rc->base_frame_target) && - rc->projected_frame_size > (2 * rc->avg_frame_bandwidth)) - ++twopass->extend_maxq; - - // Unwind undershoot or overshoot adjustment. - if (rc->rolling_target_bits < rc->rolling_actual_bits) - --twopass->extend_minq; - else if (rc->rolling_target_bits > rc->rolling_actual_bits) - --twopass->extend_maxq; - } - - twopass->extend_minq = clamp(twopass->extend_minq, 0, minq_adj_limit); - twopass->extend_maxq = clamp(twopass->extend_maxq, 0, maxq_adj_limit); - - // If there is a big and undexpected undershoot then feed the extra - // bits back in quickly. One situation where this may happen is if a - // frame is unexpectedly almost perfectly predicted by the ARF or GF - // but not very well predcited by the previous frame. - if (!frame_is_kf_gf_arf(cpi) && !cpi->rc.is_src_frame_alt_ref) { - int fast_extra_thresh = rc->base_frame_target / HIGH_UNDERSHOOT_RATIO; - if (rc->projected_frame_size < fast_extra_thresh) { - rc->vbr_bits_off_target_fast += - fast_extra_thresh - rc->projected_frame_size; - rc->vbr_bits_off_target_fast = - AOMMIN(rc->vbr_bits_off_target_fast, (4 * rc->avg_frame_bandwidth)); - - // Fast adaptation of minQ if necessary to use up the extra bits. - if (rc->avg_frame_bandwidth) { - twopass->extend_minq_fast = - (int)(rc->vbr_bits_off_target_fast * 8 / rc->avg_frame_bandwidth); - } - twopass->extend_minq_fast = AOMMIN( - twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq); - } else if (rc->vbr_bits_off_target_fast) { - twopass->extend_minq_fast = AOMMIN( - twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq); - } else { - twopass->extend_minq_fast = 0; - } - } - } -} diff --git a/third_party/aom/av1/encoder/firstpass.h b/third_party/aom/av1/encoder/firstpass.h deleted file mode 100644 index 4b7325ae2..000000000 --- a/third_party/aom/av1/encoder/firstpass.h +++ /dev/null @@ -1,208 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_FIRSTPASS_H_ -#define AOM_AV1_ENCODER_FIRSTPASS_H_ - -#include "av1/common/enums.h" -#include "av1/common/onyxc_int.h" -#include "av1/encoder/lookahead.h" -#include "av1/encoder/ratectrl.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#if CONFIG_FP_MB_STATS - -#define FPMB_DCINTRA_MASK 0x01 - -#define FPMB_MOTION_ZERO_MASK 0x02 -#define FPMB_MOTION_LEFT_MASK 0x04 -#define FPMB_MOTION_RIGHT_MASK 0x08 -#define FPMB_MOTION_UP_MASK 0x10 -#define FPMB_MOTION_DOWN_MASK 0x20 - -#define FPMB_ERROR_SMALL_MASK 0x40 -#define FPMB_ERROR_LARGE_MASK 0x80 -#define FPMB_ERROR_SMALL_TH 2000 -#define FPMB_ERROR_LARGE_TH 48000 - -typedef struct { - uint8_t *mb_stats_start; - uint8_t *mb_stats_end; -} FIRSTPASS_MB_STATS; -#endif - -// Length of the bi-predictive frame group (BFG) -// NOTE: Currently each BFG contains one backward ref (BWF) frame plus a certain -// number of bi-predictive frames. -#define BFG_INTERVAL 2 -// The maximum number of extra ALTREF's except ALTREF_FRAME -#define MAX_EXT_ARFS (REF_FRAMES - BWDREF_FRAME - 1) - -#define MIN_EXT_ARF_INTERVAL 4 - -#define MIN_ZERO_MOTION 0.95 -#define MAX_SR_CODED_ERROR 40 -#define MAX_RAW_ERR_VAR 2000 -#define MIN_MV_IN_OUT 0.4 - -#define VLOW_MOTION_THRESHOLD 950 - -typedef struct { - double frame; - double weight; - double intra_error; - double frame_avg_wavelet_energy; - double coded_error; - double sr_coded_error; - double pcnt_inter; - double pcnt_motion; - double pcnt_second_ref; - double pcnt_neutral; - double intra_skip_pct; - double inactive_zone_rows; // Image mask rows top and bottom. - double inactive_zone_cols; // Image mask columns at left and right edges. - double MVr; - double mvr_abs; - double MVc; - double mvc_abs; - double MVrv; - double MVcv; - double mv_in_out_count; - double new_mv_count; - double duration; - double count; - // standard deviation for (0, 0) motion prediction error - double raw_error_stdev; -} FIRSTPASS_STATS; - -typedef enum { - KF_UPDATE = 0, - LF_UPDATE = 1, - GF_UPDATE = 2, - ARF_UPDATE = 3, - OVERLAY_UPDATE = 4, - BRF_UPDATE = 5, // Backward Reference Frame - LAST_BIPRED_UPDATE = 6, // Last Bi-predictive Frame - BIPRED_UPDATE = 7, // Bi-predictive Frame, but not the last one - INTNL_OVERLAY_UPDATE = 8, // Internal Overlay Frame - INTNL_ARF_UPDATE = 9, // Internal Altref Frame (candidate for ALTREF2) - FRAME_UPDATE_TYPES = 10 -} FRAME_UPDATE_TYPE; - -#define FC_ANIMATION_THRESH 0.15 -typedef enum { - FC_NORMAL = 0, - FC_GRAPHICS_ANIMATION = 1, - FRAME_CONTENT_TYPES = 2 -} FRAME_CONTENT_TYPE; - -typedef struct { - unsigned char index; - RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1]; - FRAME_UPDATE_TYPE update_type[(MAX_LAG_BUFFERS * 2) + 1]; - unsigned char arf_src_offset[(MAX_LAG_BUFFERS * 2) + 1]; - unsigned char arf_update_idx[(MAX_LAG_BUFFERS * 2) + 1]; - unsigned char arf_ref_idx[(MAX_LAG_BUFFERS * 2) + 1]; -#if USE_SYMM_MULTI_LAYER - unsigned char arf_pos_in_gf[(MAX_LAG_BUFFERS * 2) + 1]; - unsigned char pyramid_level[(MAX_LAG_BUFFERS * 2) + 1]; - unsigned char pyramid_height; - unsigned char pyramid_lvl_nodes[MAX_PYRAMID_LVL]; -#endif - unsigned char brf_src_offset[(MAX_LAG_BUFFERS * 2) + 1]; - unsigned char bidir_pred_enabled[(MAX_LAG_BUFFERS * 2) + 1]; - unsigned char ref_fb_idx_map[(MAX_LAG_BUFFERS * 2) + 1][REF_FRAMES]; - unsigned char refresh_idx[(MAX_LAG_BUFFERS * 2) + 1]; - unsigned char refresh_flag[(MAX_LAG_BUFFERS * 2) + 1]; - int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1]; -} GF_GROUP; - -typedef struct { - unsigned int section_intra_rating; - FIRSTPASS_STATS total_stats; - FIRSTPASS_STATS this_frame_stats; - const FIRSTPASS_STATS *stats_in; - const FIRSTPASS_STATS *stats_in_start; - const FIRSTPASS_STATS *stats_in_end; - FIRSTPASS_STATS total_left_stats; - int first_pass_done; - int64_t bits_left; - double modified_error_min; - double modified_error_max; - double modified_error_left; - double mb_av_energy; - double frame_avg_haar_energy; - -#if CONFIG_FP_MB_STATS - uint8_t *frame_mb_stats_buf; - uint8_t *this_frame_mb_stats; - FIRSTPASS_MB_STATS firstpass_mb_stats; -#endif - // An indication of the content type of the current frame - FRAME_CONTENT_TYPE fr_content_type; - - // Projected total bits available for a key frame group of frames - int64_t kf_group_bits; - - // Error score of frames still to be coded in kf group - int64_t kf_group_error_left; - - // The fraction for a kf groups total bits allocated to the inter frames - double kfgroup_inter_fraction; - - int sr_update_lag; - - int kf_zeromotion_pct; - int last_kfgroup_zeromotion_pct; - int gf_zeromotion_pct; - int active_worst_quality; - int baseline_active_worst_quality; - int extend_minq; - int extend_maxq; - int extend_minq_fast; - - GF_GROUP gf_group; -} TWO_PASS; - -struct AV1_COMP; - -void av1_init_first_pass(struct AV1_COMP *cpi); -void av1_rc_get_first_pass_params(struct AV1_COMP *cpi); -void av1_first_pass(struct AV1_COMP *cpi, const struct lookahead_entry *source); -void av1_end_first_pass(struct AV1_COMP *cpi); - -void av1_init_second_pass(struct AV1_COMP *cpi); -void av1_rc_get_second_pass_params(struct AV1_COMP *cpi); -void av1_configure_buffer_updates_firstpass(struct AV1_COMP *cpi, - FRAME_UPDATE_TYPE update_type); - -// Post encode update of the rate control parameters for 2-pass -void av1_twopass_postencode_update(struct AV1_COMP *cpi); - -static INLINE int get_number_of_extra_arfs(int interval, int arf_pending) { - if (arf_pending && MAX_EXT_ARFS > 0) - return interval >= MIN_EXT_ARF_INTERVAL * (MAX_EXT_ARFS + 1) - ? MAX_EXT_ARFS - : interval >= MIN_EXT_ARF_INTERVAL * MAX_EXT_ARFS - ? MAX_EXT_ARFS - 1 - : 0; - else - return 0; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_FIRSTPASS_H_ diff --git a/third_party/aom/av1/encoder/global_motion.c b/third_party/aom/av1/encoder/global_motion.c deleted file mode 100644 index e9f8b0bb4..000000000 --- a/third_party/aom/av1/encoder/global_motion.c +++ /dev/null @@ -1,298 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include -#include -#include - -#include "av1/encoder/global_motion.h" - -#include "av1/common/warped_motion.h" - -#include "av1/encoder/segmentation.h" -#include "av1/encoder/corner_detect.h" -#include "av1/encoder/corner_match.h" -#include "av1/encoder/ransac.h" - -#define MAX_CORNERS 4096 -#define MIN_INLIER_PROB 0.1 - -#define MIN_TRANS_THRESH (1 * GM_TRANS_DECODE_FACTOR) - -// Border over which to compute the global motion -#define ERRORADV_BORDER 0 - -static const double erroradv_tr[] = { 0.65, 0.60, 0.55 }; -static const double erroradv_prod_tr[] = { 20000, 18000, 16000 }; - -int is_enough_erroradvantage(double best_erroradvantage, int params_cost, - int erroradv_type) { - assert(erroradv_type < GM_ERRORADV_TR_TYPES); - return best_erroradvantage < erroradv_tr[erroradv_type] && - best_erroradvantage * params_cost < erroradv_prod_tr[erroradv_type]; -} - -static void convert_to_params(const double *params, int32_t *model) { - int i; - int alpha_present = 0; - model[0] = (int32_t)floor(params[0] * (1 << GM_TRANS_PREC_BITS) + 0.5); - model[1] = (int32_t)floor(params[1] * (1 << GM_TRANS_PREC_BITS) + 0.5); - model[0] = (int32_t)clamp(model[0], GM_TRANS_MIN, GM_TRANS_MAX) * - GM_TRANS_DECODE_FACTOR; - model[1] = (int32_t)clamp(model[1], GM_TRANS_MIN, GM_TRANS_MAX) * - GM_TRANS_DECODE_FACTOR; - - for (i = 2; i < 6; ++i) { - const int diag_value = ((i == 2 || i == 5) ? (1 << GM_ALPHA_PREC_BITS) : 0); - model[i] = (int32_t)floor(params[i] * (1 << GM_ALPHA_PREC_BITS) + 0.5); - model[i] = - (int32_t)clamp(model[i] - diag_value, GM_ALPHA_MIN, GM_ALPHA_MAX); - alpha_present |= (model[i] != 0); - model[i] = (model[i] + diag_value) * GM_ALPHA_DECODE_FACTOR; - } - for (; i < 8; ++i) { - model[i] = (int32_t)floor(params[i] * (1 << GM_ROW3HOMO_PREC_BITS) + 0.5); - model[i] = (int32_t)clamp(model[i], GM_ROW3HOMO_MIN, GM_ROW3HOMO_MAX) * - GM_ROW3HOMO_DECODE_FACTOR; - alpha_present |= (model[i] != 0); - } - - if (!alpha_present) { - if (abs(model[0]) < MIN_TRANS_THRESH && abs(model[1]) < MIN_TRANS_THRESH) { - model[0] = 0; - model[1] = 0; - } - } -} - -void convert_model_to_params(const double *params, WarpedMotionParams *model) { - convert_to_params(params, model->wmmat); - model->wmtype = get_gmtype(model); - model->invalid = 0; -} - -// Adds some offset to a global motion parameter and handles -// all of the necessary precision shifts, clamping, and -// zero-centering. -static int32_t add_param_offset(int param_index, int32_t param_value, - int32_t offset) { - const int scale_vals[3] = { GM_TRANS_PREC_DIFF, GM_ALPHA_PREC_DIFF, - GM_ROW3HOMO_PREC_DIFF }; - const int clamp_vals[3] = { GM_TRANS_MAX, GM_ALPHA_MAX, GM_ROW3HOMO_MAX }; - // type of param: 0 - translation, 1 - affine, 2 - homography - const int param_type = (param_index < 2 ? 0 : (param_index < 6 ? 1 : 2)); - const int is_one_centered = (param_index == 2 || param_index == 5); - - // Make parameter zero-centered and offset the shift that was done to make - // it compatible with the warped model - param_value = (param_value - (is_one_centered << WARPEDMODEL_PREC_BITS)) >> - scale_vals[param_type]; - // Add desired offset to the rescaled/zero-centered parameter - param_value += offset; - // Clamp the parameter so it does not overflow the number of bits allotted - // to it in the bitstream - param_value = (int32_t)clamp(param_value, -clamp_vals[param_type], - clamp_vals[param_type]); - // Rescale the parameter to WARPEDMODEL_PRECISION_BITS so it is compatible - // with the warped motion library - param_value *= (1 << scale_vals[param_type]); - - // Undo the zero-centering step if necessary - return param_value + (is_one_centered << WARPEDMODEL_PREC_BITS); -} - -static void force_wmtype(WarpedMotionParams *wm, TransformationType wmtype) { - switch (wmtype) { - case IDENTITY: - wm->wmmat[0] = 0; - wm->wmmat[1] = 0; - AOM_FALLTHROUGH_INTENDED; - case TRANSLATION: - wm->wmmat[2] = 1 << WARPEDMODEL_PREC_BITS; - wm->wmmat[3] = 0; - AOM_FALLTHROUGH_INTENDED; - case ROTZOOM: - wm->wmmat[4] = -wm->wmmat[3]; - wm->wmmat[5] = wm->wmmat[2]; - AOM_FALLTHROUGH_INTENDED; - case AFFINE: wm->wmmat[6] = wm->wmmat[7] = 0; break; - default: assert(0); - } - wm->wmtype = wmtype; -} - -int64_t refine_integerized_param(WarpedMotionParams *wm, - TransformationType wmtype, int use_hbd, int bd, - uint8_t *ref, int r_width, int r_height, - int r_stride, uint8_t *dst, int d_width, - int d_height, int d_stride, int n_refinements, - int64_t best_frame_error) { - static const int max_trans_model_params[TRANS_TYPES] = { 0, 2, 4, 6 }; - const int border = ERRORADV_BORDER; - int i = 0, p; - int n_params = max_trans_model_params[wmtype]; - int32_t *param_mat = wm->wmmat; - int64_t step_error, best_error; - int32_t step; - int32_t *param; - int32_t curr_param; - int32_t best_param; - - force_wmtype(wm, wmtype); - best_error = av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride, - dst + border * d_stride + border, border, border, - d_width - 2 * border, d_height - 2 * border, - d_stride, 0, 0, best_frame_error); - best_error = AOMMIN(best_error, best_frame_error); - step = 1 << (n_refinements - 1); - for (i = 0; i < n_refinements; i++, step >>= 1) { - for (p = 0; p < n_params; ++p) { - int step_dir = 0; - // Skip searches for parameters that are forced to be 0 - param = param_mat + p; - curr_param = *param; - best_param = curr_param; - // look to the left - *param = add_param_offset(p, curr_param, -step); - step_error = - av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride, - dst + border * d_stride + border, border, border, - d_width - 2 * border, d_height - 2 * border, d_stride, - 0, 0, best_error); - if (step_error < best_error) { - best_error = step_error; - best_param = *param; - step_dir = -1; - } - - // look to the right - *param = add_param_offset(p, curr_param, step); - step_error = - av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride, - dst + border * d_stride + border, border, border, - d_width - 2 * border, d_height - 2 * border, d_stride, - 0, 0, best_error); - if (step_error < best_error) { - best_error = step_error; - best_param = *param; - step_dir = 1; - } - *param = best_param; - - // look to the direction chosen above repeatedly until error increases - // for the biggest step size - while (step_dir) { - *param = add_param_offset(p, best_param, step * step_dir); - step_error = - av1_warp_error(wm, use_hbd, bd, ref, r_width, r_height, r_stride, - dst + border * d_stride + border, border, border, - d_width - 2 * border, d_height - 2 * border, - d_stride, 0, 0, best_error); - if (step_error < best_error) { - best_error = step_error; - best_param = *param; - } else { - *param = best_param; - step_dir = 0; - } - } - } - } - force_wmtype(wm, wmtype); - wm->wmtype = get_gmtype(wm); - return best_error; -} - -static INLINE RansacFunc get_ransac_type(TransformationType type) { - switch (type) { - case AFFINE: return ransac_affine; - case ROTZOOM: return ransac_rotzoom; - case TRANSLATION: return ransac_translation; - default: assert(0); return NULL; - } -} - -static unsigned char *downconvert_frame(YV12_BUFFER_CONFIG *frm, - int bit_depth) { - int i, j; - uint16_t *orig_buf = CONVERT_TO_SHORTPTR(frm->y_buffer); - uint8_t *buf_8bit = frm->y_buffer_8bit; - assert(buf_8bit); - if (!frm->buf_8bit_valid) { - for (i = 0; i < frm->y_height; ++i) { - for (j = 0; j < frm->y_width; ++j) { - buf_8bit[i * frm->y_stride + j] = - orig_buf[i * frm->y_stride + j] >> (bit_depth - 8); - } - } - frm->buf_8bit_valid = 1; - } - return buf_8bit; -} - -int compute_global_motion_feature_based(TransformationType type, - YV12_BUFFER_CONFIG *frm, - YV12_BUFFER_CONFIG *ref, int bit_depth, - int *num_inliers_by_motion, - double *params_by_motion, - int num_motions) { - int i; - int num_frm_corners, num_ref_corners; - int num_correspondences; - int *correspondences; - int frm_corners[2 * MAX_CORNERS], ref_corners[2 * MAX_CORNERS]; - unsigned char *frm_buffer = frm->y_buffer; - unsigned char *ref_buffer = ref->y_buffer; - RansacFunc ransac = get_ransac_type(type); - - if (frm->flags & YV12_FLAG_HIGHBITDEPTH) { - // The frame buffer is 16-bit, so we need to convert to 8 bits for the - // following code. We cache the result until the frame is released. - frm_buffer = downconvert_frame(frm, bit_depth); - } - if (ref->flags & YV12_FLAG_HIGHBITDEPTH) { - ref_buffer = downconvert_frame(ref, bit_depth); - } - - // compute interest points in images using FAST features - num_frm_corners = fast_corner_detect(frm_buffer, frm->y_width, frm->y_height, - frm->y_stride, frm_corners, MAX_CORNERS); - num_ref_corners = fast_corner_detect(ref_buffer, ref->y_width, ref->y_height, - ref->y_stride, ref_corners, MAX_CORNERS); - - // find correspondences between the two images - correspondences = - (int *)malloc(num_frm_corners * 4 * sizeof(*correspondences)); - num_correspondences = determine_correspondence( - frm_buffer, (int *)frm_corners, num_frm_corners, ref_buffer, - (int *)ref_corners, num_ref_corners, frm->y_width, frm->y_height, - frm->y_stride, ref->y_stride, correspondences); - - ransac(correspondences, num_correspondences, num_inliers_by_motion, - params_by_motion, num_motions); - - free(correspondences); - - // Set num_inliers = 0 for motions with too few inliers so they are ignored. - for (i = 0; i < num_motions; ++i) { - if (num_inliers_by_motion[i] < MIN_INLIER_PROB * num_correspondences) { - num_inliers_by_motion[i] = 0; - } - } - - // Return true if any one of the motions has inliers. - for (i = 0; i < num_motions; ++i) { - if (num_inliers_by_motion[i] > 0) return 1; - } - return 0; -} diff --git a/third_party/aom/av1/encoder/global_motion.h b/third_party/aom/av1/encoder/global_motion.h deleted file mode 100644 index c7c016c43..000000000 --- a/third_party/aom/av1/encoder/global_motion.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_GLOBAL_MOTION_H_ -#define AOM_AV1_ENCODER_GLOBAL_MOTION_H_ - -#include "aom/aom_integer.h" -#include "aom_scale/yv12config.h" -#include "av1/common/mv.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define RANSAC_NUM_MOTIONS 1 - -void convert_model_to_params(const double *params, WarpedMotionParams *model); - -int is_enough_erroradvantage(double best_erroradvantage, int params_cost, - int erroradv_type); - -// Returns the av1_warp_error between "dst" and the result of applying the -// motion params that result from fine-tuning "wm" to "ref". Note that "wm" is -// modified in place. -int64_t refine_integerized_param(WarpedMotionParams *wm, - TransformationType wmtype, int use_hbd, int bd, - uint8_t *ref, int r_width, int r_height, - int r_stride, uint8_t *dst, int d_width, - int d_height, int d_stride, int n_refinements, - int64_t best_frame_error); - -/* - Computes "num_motions" candidate global motion parameters between two frames. - The array "params_by_motion" should be length 8 * "num_motions". The ordering - of each set of parameters is best described by the homography: - - [x' (m2 m3 m0 [x - z . y' = m4 m5 m1 * y - 1] m6 m7 1) 1] - - where m{i} represents the ith value in any given set of parameters. - - "num_inliers" should be length "num_motions", and will be populated with the - number of inlier feature points for each motion. Params for which the - num_inliers entry is 0 should be ignored by the caller. -*/ -int compute_global_motion_feature_based(TransformationType type, - YV12_BUFFER_CONFIG *frm, - YV12_BUFFER_CONFIG *ref, int bit_depth, - int *num_inliers_by_motion, - double *params_by_motion, - int num_motions); -#ifdef __cplusplus -} // extern "C" -#endif -#endif // AOM_AV1_ENCODER_GLOBAL_MOTION_H_ diff --git a/third_party/aom/av1/encoder/grain_test_vectors.h b/third_party/aom/av1/encoder/grain_test_vectors.h deleted file mode 100644 index 945dc3733..000000000 --- a/third_party/aom/av1/encoder/grain_test_vectors.h +++ /dev/null @@ -1,781 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#ifndef AOM_AV1_ENCODER_GRAIN_TEST_VECTORS_H_ -#define AOM_AV1_ENCODER_GRAIN_TEST_VECTORS_H_ - -/* Test vectors for emulation of different film grain types. - * Note that bit depth would be derived from the bitstream and - * not signaled in film grain metadata. The parameters are valid - * for any bit depth. - */ -static aom_film_grain_t film_grain_test_vectors[16] = { - /* Test 1 */ - { - 1 /* apply_grain */, - 1 /* update_parameters */, - { { 16, 0 }, - { 25, 136 }, - { 33, 144 }, - { 41, 160 }, - { 48, 168 }, - { 56, 136 }, - { 67, 128 }, - { 82, 144 }, - { 97, 152 }, - { 113, 144 }, - { 128, 176 }, - { 143, 168 }, - { 158, 176 }, - { 178, 184 } }, - 14 /* num_points_y */, - { { 16, 0 }, - { 20, 64 }, - { 28, 88 }, - { 60, 104 }, - { 90, 136 }, - { 105, 160 }, - { 134, 168 }, - { 168, 208 } }, - 8 /* num_cb_points */, - { { 16, 0 }, - { 28, 96 }, - { 56, 80 }, - { 66, 96 }, - { 80, 104 }, - { 108, 96 }, - { 122, 112 }, - { 137, 112 }, - { 169, 176 } }, - 9 /* num_cr_points */, - 11 /* scaling_shift */, - 2 /* ar_coeff_lag */, - { 0, 0, -58, 0, 0, 0, -76, 100, -43, 0, -51, 82 }, - { 0, 0, -49, 0, 0, 0, -36, 22, -30, 0, -38, 7, 39 }, - { 0, 0, -47, 0, 0, 0, -31, 31, -25, 0, -32, 13, -100 }, - 8 /* ar_coeff_shift */, - 247 /* cb_mult */, - 192 /* cb_luma_mult */, - 18 /* cb_offset */, - 229 /* cr_mult */, - 192 /* cr_luma_mult */, - 54 /* cr_offset */, - 0 /* overlap_flag */, - 1 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /* chroma_scaling_from_luma*/, - 0 /* grain_scale_shift*/, - 45231 /* random_seed */ - }, - /* Test 2 */ - { - 1 /* apply_grain */, - 1 /* update_parameters */, - { { 0, 96 }, { 255, 96 } }, - 2 /* num_points_y */, - { { 0, 64 }, { 255, 64 } }, - 2 /* num_cb_points */, - { { 0, 64 }, { 255, 64 } }, - 2 /* num_cr_points */, - 11 /* scaling_shift */, - 3 /* ar_coeff_lag */, - { - 4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25, - 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, - }, - 7 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 1 /* overlap_flag */, - 0 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /*chroma_scaling_from_luma*/, - 0 /* grain_scale_shift*/, - 45231 /* random_seed */ - }, - /* Test 3 */ - { - 1 /* apply_grain */, - 1 /* update_parameters */, - { { 0, 192 }, { 255, 192 } }, - 2 /* num_points_y */, - { { 0, 128 }, { 255, 128 } }, - 2 /* num_cb_points */, - { { 0, 128 }, { 255, 128 } }, - 2 /* num_cr_points */, - 11 /* scaling_shift */, - 3 /* ar_coeff_lag */, - { - 4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25, - 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66, - }, - { - 4, -7, 2, 4, 12, -12, 5, -8, 6, 8, -19, -16, 19, - -10, -2, 17, -42, 58, -2, -13, 9, 14, -36, 67, 0, - }, - { - 4, -7, 2, 4, 12, -12, 5, -8, 6, 8, -19, -16, 19, - -10, -2, 17, -42, 58, -2, -13, 9, 14, -36, 67, 0, - }, - 7 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 1 /* overlap_flag */, - 1 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /*chroma_scaling_from_luma*/, - 1 /* grain_scale_shift*/, - 45231 /* random_seed */ - }, - /* Test 4 */ - { - 1 /* apply_grain */, - 1 /* update_parameters */, - { - { 16, 0 }, - { 24, 137 }, - { 53, 146 }, - { 63, 155 }, - { 78, 155 }, - { 107, 150 }, - { 122, 147 }, - { 136, 147 }, - { 166, 153 }, - }, - 9 /* num_points_y */, - { - { 16, 0 }, - { 20, 72 }, - { 27, 82 }, - { 33, 91 }, - { 69, 121 }, - { 95, 143 }, - { 108, 154 }, - { 134, 169 }, - { 147, 177 }, - }, - 9 /* num_cb_points */, - { - { 16, 0 }, - { 24, 95 }, - { 54, 93 }, - { 65, 94 }, - { 79, 98 }, - { 109, 107 }, - { 124, 119 }, - { 139, 136 }, - { 169, 170 }, - }, - 9 /* num_cr_points */, - 11 /* scaling_shift */, - 3 /* ar_coeff_lag */, - { - 7, -9, 2, 4, 7, -12, 7, -18, 18, -30, -27, -42, - 13, -20, 7, -18, 6, 107, 55, -2, -4, -9, -22, 113, - }, - { - -3, -1, -4, 3, -6, -2, 3, 1, -4, -10, -10, -5, -5, - -3, -1, -13, -28, -25, -31, -6, -4, 14, -64, 66, 0, - }, - { - 0, 4, -3, 13, 0, 1, -3, 0, -3, -10, -68, -4, -2, - -5, 2, -3, -20, 62, -31, 0, -4, -1, -8, -29, 0, - }, - 8 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 1 /* overlap_flag */, - 0 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /*chroma_scaling_from_luma*/, - 0 /* grain_scale_shift*/, - 45231 /* random_seed */ - }, - /* Test 5 */ - { - 1 /* apply_grain */, - 0 /* update_parameters */, - { { 0, 64 }, { 255, 64 } }, - 2 /* num_points_y */, - { - { 0, 96 }, - { 32, 90 }, - { 64, 83 }, - { 96, 76 }, - { 128, 68 }, - { 159, 59 }, - { 191, 48 }, - { 223, 34 }, - { 255, 0 }, - }, - 9 /* num_cb_points */, - { - { 0, 0 }, - { 32, 34 }, - { 64, 48 }, - { 96, 59 }, - { 128, 68 }, - { 159, 76 }, - { 191, 83 }, - { 223, 90 }, - { 255, 96 }, - }, - 9 /* num_cr_points */, - 11 /* scaling_shift */, - 3 /* ar_coeff_lag */, - { - 4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25, - 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66, - }, - { - -2, 2, -5, 7, -6, 4, -2, -1, 1, -2, 0, -2, 2, - -3, -5, 13, -13, 6, -14, 8, -1, 18, -36, 58, 0, - }, - { - -2, -1, -3, 14, -4, -1, -3, 0, -1, 7, -31, 7, 2, - 0, 1, 0, -7, 50, -8, -2, 2, 2, 2, -4, 0, - }, - 7 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 1 /* overlap_flag */, - 1 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /*chroma_scaling_from_luma*/, - 0 /* grain_scale_shift*/, - 1063 /* random_seed */ - }, - /* Test 6 */ - { - 1 /* apply_grain */, - 1 /* update_parameters */, - { - { 0, 96 }, - { 20, 92 }, - { 39, 88 }, - { 59, 84 }, - { 78, 80 }, - { 98, 75 }, - { 118, 70 }, - { 137, 65 }, - { 157, 60 }, - { 177, 53 }, - { 196, 46 }, - { 216, 38 }, - { 235, 27 }, - { 255, 0 }, - }, - 14 /* num_points_y */, - { { 0, 0 } }, - 0 /* num_cb_points */, - { { 0, 0 } }, - 0 /* num_cr_points */, - 11 /* scaling_shift */, - 3 /* ar_coeff_lag */, - { - 4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25, - 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - 7 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 1 /* overlap_flag */, - 1 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /*chroma_scaling_from_luma*/, - 0 /* grain_scale_shift*/, - 2754 /* random_seed */ - }, - /* Test 7 */ - { - 1 /* apply_grain */, - 1 /* update_parameters */, - { - { 0, 0 }, - { 20, 27 }, - { 39, 38 }, - { 59, 46 }, - { 78, 53 }, - { 98, 60 }, - { 118, 65 }, - { 137, 70 }, - { 157, 75 }, - { 177, 80 }, - { 196, 84 }, - { 216, 88 }, - { 235, 92 }, - { 255, 96 }, - }, - 14 /* num_points_y */, - { { 0, 0 }, { 255, 0 } }, - 2 /* num_cb_points */, - { { 0, 0 }, { 255, 0 } }, - 2 /* num_cr_points */, - 11 /* scaling_shift */, - 3 /* ar_coeff_lag */, - { - 4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25, - 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - 7 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 1 /* overlap_flag */, - 1 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /*chroma_scaling_from_luma*/, - 0 /* grain_scale_shift*/, - 45231 /* random_seed */ - }, - /* Test 8 */ - { - 1 /* apply_grain */, - 1 /* update_parameters */, - { { 0, 96 }, { 255, 96 } }, - 2 /* num_points_y */, - { { 0, 62 }, { 255, 62 } }, - 2 /* num_cb_points */, - { { 0, 62 }, { 255, 62 } }, - 2 /* num_cr_points */, - 11 /* scaling_shift */, - 3 /* ar_coeff_lag */, - { - 4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25, - 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66, - }, - { - 0, -2, -2, 8, 5, -1, 1, -1, 5, 16, -33, -9, 6, - -1, -3, 10, -47, 63, 0, -15, 3, 11, -42, 75, -69, - }, - { - 1, -1, -1, 9, 5, 0, 1, -1, 5, 15, -32, -10, 8, - -2, -4, 11, -46, 62, 1, -16, 3, 13, -43, 75, -55, - }, - 7 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 1 /* overlap_flag */, - 0 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /*chroma_scaling_from_luma*/, - 0 /* grain_scale_shift*/, - 45231 /* random_seed */ - }, - /* Test 9 */ - { - 1 /* apply_grain */, - 0 /* update_parameters */, - { { 0, 48 }, { 255, 48 } }, - 2 /* num_points_y */, - { { 0, 32 }, { 255, 32 } }, - 2 /* num_cb_points */, - { { 0, 32 }, { 255, 32 } }, - 2 /* num_cr_points */, - 10 /* scaling_shift */, - 2 /* ar_coeff_lag */, - { 10, -30, -20, -39, 1, -24, 12, 103, 60, -9, -24, 113 }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127 }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127 }, - 8 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 1 /* overlap_flag */, - 0 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /*chroma_scaling_from_luma*/, - 0 /* grain_scale_shift*/, - 45231 /* random_seed */ - }, - /* Test 10 */ - { - 1 /* apply_grain */, - 1 /* update_parameters */, - { { 0, 48 }, { 255, 48 } }, - 2 /* num_points_y */, - { { 0, 32 }, { 255, 32 } }, - 2 /* num_cb_points */, - { { 0, 32 }, { 255, 32 } }, - 2 /* num_cr_points */, - 10 /* scaling_shift */, - 2 /* ar_coeff_lag */, - { 10, -30, -20, -39, 1, -24, 12, 103, 60, -9, -24, 113 }, - { -7, -6, -48, -22, 2, -3, -45, 73, -11, -26, -52, 76, 0 }, - { -7, -6, -48, -22, 2, -3, -45, 73, -11, -26, -52, 76, 0 }, - 8 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 1 /* overlap_flag */, - 0 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /*chroma_scaling_from_luma*/, - 0 /* grain_scale_shift*/, - 45231 /* random_seed */ - }, - /* Test 11 */ - { - 1 /* apply_grain */, - 0 /* update_parameters */, - { { 0, 32 }, { 255, 32 } }, - 2 /* num_points_y */, - { - { 0, 48 }, - { 32, 45 }, - { 64, 42 }, - { 96, 38 }, - { 128, 34 }, - { 159, 29 }, - { 191, 24 }, - { 223, 17 }, - { 255, 0 }, - }, - 9 /* num_cb_points */, - { - { 0, 0 }, - { 32, 17 }, - { 64, 24 }, - { 96, 29 }, - { 128, 34 }, - { 159, 38 }, - { 191, 42 }, - { 223, 45 }, - { 255, 48 }, - }, - 9 /* num_cr_points */, - 10 /* scaling_shift */, - 3 /* ar_coeff_lag */, - { - 7, -9, 2, 4, 7, -12, 7, -18, 18, -30, -27, -42, - 13, -20, 7, -18, 6, 107, 55, -2, -4, -9, -22, 113, - }, - { - -3, -1, -4, 3, -6, -2, 3, 1, -4, -10, -10, -5, -5, - -3, -1, -13, -28, -25, -31, -6, -4, 14, -64, 66, 0, - }, - { - 0, 4, -3, 13, 0, 1, -3, 0, -3, -10, -68, -4, -2, - -5, 2, -3, -20, 62, -31, 0, -4, -1, -8, -29, 0, - }, - 8 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 1 /* overlap_flag */, - 1 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /*chroma_scaling_from_luma*/, - 0 /* grain_scale_shift*/, - 1357 /* random_seed */ - }, - /* Test 12 */ - { - 1 /* apply_grain */, - 1 /* update_parameters */, - { - { 16, 0 }, - { 24, 49 }, - { 39, 69 }, - { 46, 84 }, - { 53, 91 }, - { 63, 100 }, - { 78, 114 }, - { 92, 134 }, - { 164, 139 }, - }, - 9 /* num_points_y */, - { - { 16, 0 }, - { 20, 31 }, - { 26, 42 }, - { 33, 54 }, - { 40, 65 }, - { 47, 72 }, - { 56, 85 }, - { 84, 123 }, - { 152, 157 }, - }, - 9 /* num_cb_points */, - { - { 16, 0 }, - { 25, 14 }, - { 39, 33 }, - { 47, 40 }, - { 54, 47 }, - { 64, 62 }, - { 79, 76 }, - { 94, 83 }, - { 167, 101 }, - }, - 9 /* num_cr_points */, - 10 /* scaling_shift */, - 2 /* ar_coeff_lag */, - { 0, 0, -58, 0, 0, 0, -76, 100, -43, 0, -51, 82 }, - { 0, 0, -49, 0, 0, 0, -36, 22, -30, 0, -38, 7, 39 }, - { 0, 0, -47, 0, 0, 0, -31, 31, -25, 0, -32, 13, -100 }, - 8 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 0 /* overlap_flag */, - 0 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /*chroma_scaling_from_luma*/, - 0 /* grain_scale_shift*/, - 45231 /* random_seed */ - }, - /* Test 13 */ - { - 1 /* apply_grain */, - 1 /* update_parameters */, - { - { 0, 48 }, - { 20, 46 }, - { 39, 44 }, - { 59, 42 }, - { 78, 40 }, - { 98, 38 }, - { 118, 35 }, - { 137, 33 }, - { 157, 30 }, - { 177, 27 }, - { 196, 23 }, - { 216, 19 }, - { 235, 13 }, - { 255, 0 }, - }, - 14 /* num_points_y */, - { { 0, 0 }, { 255, 0 } }, - 0 /* num_cb_points */, - { { 0, 0 }, { 255, 0 } }, - 0 /* num_cr_points */, - 10 /* scaling_shift */, - 2 /* ar_coeff_lag */, - { 10, -30, -20, -39, 1, -24, 12, 103, 60, -9, -24, 113 }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - 8 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 1 /* overlap_flag */, - 0 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /*chroma_scaling_from_luma*/, - 0 /* grain_scale_shift*/, - 45231 /* random_seed */ - }, - /* Test 14 */ - { - 1 /* apply_grain */, - 1 /* update_parameters */, - { - { 0, 0 }, - { 20, 13 }, - { 39, 19 }, - { 59, 23 }, - { 78, 27 }, - { 98, 30 }, - { 118, 33 }, - { 137, 35 }, - { 157, 38 }, - { 177, 40 }, - { 196, 42 }, - { 216, 44 }, - { 235, 46 }, - { 255, 48 }, - }, - 14 /* num_points_y */, - { { 0, 0 }, { 255, 0 } }, - 0 /* num_cb_points */, - { { 0, 0 }, { 255, 0 } }, - 0 /* num_cr_points */, - 10 /* scaling_shift */, - 2 /* ar_coeff_lag */, - { 10, -30, -20, -39, 1, -24, 12, 103, 60, -9, -24, 113 }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, - 8 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 1 /* overlap_flag */, - 1 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /*chroma_scaling_from_luma*/, - 0 /* grain_scale_shift*/, - 45231 /* random_seed */ - }, - /* Test 15 */ - { - 1 /* apply_grain */, - 1 /* update_parameters */, - { { 0, 96 }, { 255, 96 } }, - 1 /* num_points_y */, - { { 0, 96 }, { 255, 96 } }, - 0 /* num_cb_points */, - { { 0, 96 }, { 255, 96 } }, - 0 /* num_cr_points */, - 11 /* scaling_shift */, - 2 /* ar_coeff_lag */, - { 5, -15, -10, -19, 0, -12, 6, 51, 30, -5, -12, 56 }, - { 2, 2, -24, -5, 1, 1, -18, 37, -2, 0, -15, 39, -70 }, - { 2, 3, -24, -5, -1, 0, -18, 38, -2, 0, -15, 39, -55 }, - 7 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 1 /* overlap_flag */, - 0 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 1 /*chroma_scaling_from_luma*/, - 0 /* grain_scale_shift*/, - 45231 /* random_seed */ - }, - /* Test 16 */ - { - 1 /* apply_grain */, - 1 /* update_parameters */, - { - { 16, 0 }, - { 58, 126 }, - { 87, 120 }, - { 97, 122 }, - { 112, 125 }, - { 126, 131 }, - { 141, 139 }, - { 199, 153 }, - }, - 8 /* num_points_y */, - { - { 16, 0 }, - { 59, 68 }, - { 66, 76 }, - { 73, 82 }, - { 79, 85 }, - { 86, 86 }, - { 151, 95 }, - { 192, 101 }, - }, - 8 /* num_cb_points */, - { - { 16, 0 }, - { 59, 64 }, - { 89, 80 }, - { 99, 86 }, - { 114, 90 }, - { 129, 93 }, - { 144, 97 }, - { 203, 85 }, - }, - 8 /* num_cr_points */, - 10 /* scaling_shift */, - 3 /* ar_coeff_lag */, - { - 4, 1, 3, 0, 1, -3, 8, -3, 7, -23, 1, -25, - 0, -10, 6, -17, -4, 53, 36, 5, -5, -17, 8, 66, - }, - { - 0, -2, -2, 8, 5, -1, 1, -1, 5, 16, -33, -9, 6, - -1, -3, 10, -47, 63, 0, -15, 3, 11, -42, 75, -69, - }, - { - 1, -1, -1, 9, 5, 0, 1, -1, 5, 15, -32, -10, 8, - -2, -4, 11, -46, 62, 1, -16, 3, 13, -43, 75, -55, - }, - 7 /* ar_coeff_shift */, - 128 /* cb_mult */, - 192 /* cb_luma_mult */, - 256 /* cb_offset */, - 128 /* cr_mult */, - 192 /* cr_luma_mult */, - 256 /* cr_offset */, - 1 /* overlap_flag */, - 0 /* clip_to_restricted_range */, - 8 /* bit_depth */, - 0 /*chroma_scaling_from_luma*/, - 2 /* grain_scale_shift*/, - 45231 /* random_seed */ - }, -}; -#endif // AOM_AV1_ENCODER_GRAIN_TEST_VECTORS_H_ diff --git a/third_party/aom/av1/encoder/hash.c b/third_party/aom/av1/encoder/hash.c deleted file mode 100644 index 180115d9f..000000000 --- a/third_party/aom/av1/encoder/hash.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "av1/encoder/hash.h" - -static void crc_calculator_process_data(CRC_CALCULATOR *p_crc_calculator, - uint8_t *pData, uint32_t dataLength) { - for (uint32_t i = 0; i < dataLength; i++) { - const uint8_t index = - (p_crc_calculator->remainder >> (p_crc_calculator->bits - 8)) ^ - pData[i]; - p_crc_calculator->remainder <<= 8; - p_crc_calculator->remainder ^= p_crc_calculator->table[index]; - } -} - -static void crc_calculator_reset(CRC_CALCULATOR *p_crc_calculator) { - p_crc_calculator->remainder = 0; -} - -static uint32_t crc_calculator_get_crc(CRC_CALCULATOR *p_crc_calculator) { - return p_crc_calculator->remainder & p_crc_calculator->final_result_mask; -} - -static void crc_calculator_init_table(CRC_CALCULATOR *p_crc_calculator) { - const uint32_t high_bit = 1 << (p_crc_calculator->bits - 1); - const uint32_t byte_high_bit = 1 << (8 - 1); - - for (uint32_t value = 0; value < 256; value++) { - uint32_t remainder = 0; - for (uint8_t mask = byte_high_bit; mask != 0; mask >>= 1) { - if (value & mask) { - remainder ^= high_bit; - } - - if (remainder & high_bit) { - remainder <<= 1; - remainder ^= p_crc_calculator->trunc_poly; - } else { - remainder <<= 1; - } - } - p_crc_calculator->table[value] = remainder; - } -} - -void av1_crc_calculator_init(CRC_CALCULATOR *p_crc_calculator, uint32_t bits, - uint32_t truncPoly) { - p_crc_calculator->remainder = 0; - p_crc_calculator->bits = bits; - p_crc_calculator->trunc_poly = truncPoly; - p_crc_calculator->final_result_mask = (1 << bits) - 1; - crc_calculator_init_table(p_crc_calculator); -} - -uint32_t av1_get_crc_value(void *crc_calculator, uint8_t *p, int length) { - CRC_CALCULATOR *p_crc_calculator = (CRC_CALCULATOR *)crc_calculator; - crc_calculator_reset(p_crc_calculator); - crc_calculator_process_data(p_crc_calculator, p, length); - return crc_calculator_get_crc(p_crc_calculator); -} - -/* CRC-32C (iSCSI) polynomial in reversed bit order. */ -#define POLY 0x82f63b78 - -/* Construct table for software CRC-32C calculation. */ -void av1_crc32c_calculator_init(CRC32C *p_crc32c) { - uint32_t crc; - - for (int n = 0; n < 256; n++) { - crc = n; - crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; - crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; - crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; - crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; - crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; - crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; - crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; - crc = crc & 1 ? (crc >> 1) ^ POLY : crc >> 1; - p_crc32c->table[0][n] = crc; - } - for (int n = 0; n < 256; n++) { - crc = p_crc32c->table[0][n]; - for (int k = 1; k < 8; k++) { - crc = p_crc32c->table[0][crc & 0xff] ^ (crc >> 8); - p_crc32c->table[k][n] = crc; - } - } -} - -/* Table-driven software version as a fall-back. This is about 15 times slower - than using the hardware instructions. This assumes little-endian integers, - as is the case on Intel processors that the assembler code here is for. */ -uint32_t av1_get_crc32c_value_c(CRC32C *p, uint8_t *buf, size_t len) { - const uint8_t *next = (const uint8_t *)(buf); - uint64_t crc; - - crc = 0 ^ 0xffffffff; - while (len && ((uintptr_t)next & 7) != 0) { - crc = p->table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8); - len--; - } - while (len >= 8) { - crc ^= *(uint64_t *)next; - crc = p->table[7][crc & 0xff] ^ p->table[6][(crc >> 8) & 0xff] ^ - p->table[5][(crc >> 16) & 0xff] ^ p->table[4][(crc >> 24) & 0xff] ^ - p->table[3][(crc >> 32) & 0xff] ^ p->table[2][(crc >> 40) & 0xff] ^ - p->table[1][(crc >> 48) & 0xff] ^ p->table[0][crc >> 56]; - next += 8; - len -= 8; - } - while (len) { - crc = p->table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8); - len--; - } - return (uint32_t)crc ^ 0xffffffff; -} diff --git a/third_party/aom/av1/encoder/hash.h b/third_party/aom/av1/encoder/hash.h deleted file mode 100644 index 826c004d6..000000000 --- a/third_party/aom/av1/encoder/hash.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_HASH_H_ -#define AOM_AV1_ENCODER_HASH_H_ - -#include "config/aom_config.h" - -#include "aom/aom_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct _crc_calculator { - uint32_t remainder; - uint32_t trunc_poly; - uint32_t bits; - uint32_t table[256]; - uint32_t final_result_mask; -} CRC_CALCULATOR; - -// Initialize the crc calculator. It must be executed at least once before -// calling av1_get_crc_value(). -void av1_crc_calculator_init(CRC_CALCULATOR *p_crc_calculator, uint32_t bits, - uint32_t truncPoly); -uint32_t av1_get_crc_value(void *crc_calculator, uint8_t *p, int length); - -// CRC32C: POLY = 0x82f63b78; -typedef struct _CRC32C { - /* Table for a quadword-at-a-time software crc. */ - uint32_t table[8][256]; -} CRC32C; - -// init table for software version crc32c -void av1_crc32c_calculator_init(CRC32C *p_crc32c); - -#define AOM_BUFFER_SIZE_FOR_BLOCK_HASH (4096) - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_HASH_H_ diff --git a/third_party/aom/av1/encoder/hash_motion.c b/third_party/aom/av1/encoder/hash_motion.c deleted file mode 100644 index e85a516e8..000000000 --- a/third_party/aom/av1/encoder/hash_motion.c +++ /dev/null @@ -1,482 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include - -#include "config/av1_rtcd.h" - -#include "av1/encoder/block.h" -#include "av1/encoder/hash.h" -#include "av1/encoder/hash_motion.h" - -static const int crc_bits = 16; -static const int block_size_bits = 3; - -static void hash_table_clear_all(hash_table *p_hash_table) { - if (p_hash_table->p_lookup_table == NULL) { - return; - } - int max_addr = 1 << (crc_bits + block_size_bits); - for (int i = 0; i < max_addr; i++) { - if (p_hash_table->p_lookup_table[i] != NULL) { - aom_vector_destroy(p_hash_table->p_lookup_table[i]); - aom_free(p_hash_table->p_lookup_table[i]); - p_hash_table->p_lookup_table[i] = NULL; - } - } -} - -// TODO(youzhou@microsoft.com): is higher than 8 bits screen content supported? -// If yes, fix this function -static void get_pixels_in_1D_char_array_by_block_2x2(uint8_t *y_src, int stride, - uint8_t *p_pixels_in1D) { - uint8_t *p_pel = y_src; - int index = 0; - for (int i = 0; i < 2; i++) { - for (int j = 0; j < 2; j++) { - p_pixels_in1D[index++] = p_pel[j]; - } - p_pel += stride; - } -} - -static void get_pixels_in_1D_short_array_by_block_2x2(uint16_t *y_src, - int stride, - uint16_t *p_pixels_in1D) { - uint16_t *p_pel = y_src; - int index = 0; - for (int i = 0; i < 2; i++) { - for (int j = 0; j < 2; j++) { - p_pixels_in1D[index++] = p_pel[j]; - } - p_pel += stride; - } -} - -static int is_block_2x2_row_same_value(uint8_t *p) { - if (p[0] != p[1] || p[2] != p[3]) { - return 0; - } - return 1; -} - -static int is_block16_2x2_row_same_value(uint16_t *p) { - if (p[0] != p[1] || p[2] != p[3]) { - return 0; - } - return 1; -} - -static int is_block_2x2_col_same_value(uint8_t *p) { - if ((p[0] != p[2]) || (p[1] != p[3])) { - return 0; - } - return 1; -} - -static int is_block16_2x2_col_same_value(uint16_t *p) { - if ((p[0] != p[2]) || (p[1] != p[3])) { - return 0; - } - return 1; -} - -// the hash value (hash_value1 consists two parts, the first 3 bits relate to -// the block size and the remaining 16 bits are the crc values. This fuction -// is used to get the first 3 bits. -static int hash_block_size_to_index(int block_size) { - switch (block_size) { - case 4: return 0; - case 8: return 1; - case 16: return 2; - case 32: return 3; - case 64: return 4; - case 128: return 5; - default: return -1; - } -} - -void av1_hash_table_init(hash_table *p_hash_table, MACROBLOCK *x) { - if (x->g_crc_initialized == 0) { - av1_crc_calculator_init(&x->crc_calculator1, 24, 0x5D6DCB); - av1_crc_calculator_init(&x->crc_calculator2, 24, 0x864CFB); - x->g_crc_initialized = 1; - } - p_hash_table->p_lookup_table = NULL; -} - -void av1_hash_table_destroy(hash_table *p_hash_table) { - hash_table_clear_all(p_hash_table); - aom_free(p_hash_table->p_lookup_table); - p_hash_table->p_lookup_table = NULL; -} - -void av1_hash_table_create(hash_table *p_hash_table) { - if (p_hash_table->p_lookup_table != NULL) { - hash_table_clear_all(p_hash_table); - return; - } - const int max_addr = 1 << (crc_bits + block_size_bits); - p_hash_table->p_lookup_table = - (Vector **)aom_malloc(sizeof(p_hash_table->p_lookup_table[0]) * max_addr); - memset(p_hash_table->p_lookup_table, 0, - sizeof(p_hash_table->p_lookup_table[0]) * max_addr); -} - -static void hash_table_add_to_table(hash_table *p_hash_table, - uint32_t hash_value, - block_hash *curr_block_hash) { - if (p_hash_table->p_lookup_table[hash_value] == NULL) { - p_hash_table->p_lookup_table[hash_value] = - aom_malloc(sizeof(p_hash_table->p_lookup_table[0][0])); - aom_vector_setup(p_hash_table->p_lookup_table[hash_value], 10, - sizeof(curr_block_hash[0])); - aom_vector_push_back(p_hash_table->p_lookup_table[hash_value], - curr_block_hash); - } else { - aom_vector_push_back(p_hash_table->p_lookup_table[hash_value], - curr_block_hash); - } -} - -int32_t av1_hash_table_count(hash_table *p_hash_table, uint32_t hash_value) { - if (p_hash_table->p_lookup_table[hash_value] == NULL) { - return 0; - } else { - return (int32_t)(p_hash_table->p_lookup_table[hash_value]->size); - } -} - -Iterator av1_hash_get_first_iterator(hash_table *p_hash_table, - uint32_t hash_value) { - assert(av1_hash_table_count(p_hash_table, hash_value) > 0); - return aom_vector_begin(p_hash_table->p_lookup_table[hash_value]); -} - -int32_t av1_has_exact_match(hash_table *p_hash_table, uint32_t hash_value1, - uint32_t hash_value2) { - if (p_hash_table->p_lookup_table[hash_value1] == NULL) { - return 0; - } - Iterator iterator = - aom_vector_begin(p_hash_table->p_lookup_table[hash_value1]); - Iterator last = aom_vector_end(p_hash_table->p_lookup_table[hash_value1]); - for (; !iterator_equals(&iterator, &last); iterator_increment(&iterator)) { - if ((*(block_hash *)iterator_get(&iterator)).hash_value2 == hash_value2) { - return 1; - } - } - return 0; -} - -void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture, - uint32_t *pic_block_hash[2], - int8_t *pic_block_same_info[3], - MACROBLOCK *x) { - const int width = 2; - const int height = 2; - const int x_end = picture->y_crop_width - width + 1; - const int y_end = picture->y_crop_height - height + 1; - - const int length = width * 2; - if (picture->flags & YV12_FLAG_HIGHBITDEPTH) { - uint16_t p[4]; - int pos = 0; - for (int y_pos = 0; y_pos < y_end; y_pos++) { - for (int x_pos = 0; x_pos < x_end; x_pos++) { - get_pixels_in_1D_short_array_by_block_2x2( - CONVERT_TO_SHORTPTR(picture->y_buffer) + y_pos * picture->y_stride + - x_pos, - picture->y_stride, p); - pic_block_same_info[0][pos] = is_block16_2x2_row_same_value(p); - pic_block_same_info[1][pos] = is_block16_2x2_col_same_value(p); - - pic_block_hash[0][pos] = av1_get_crc_value( - &x->crc_calculator1, (uint8_t *)p, length * sizeof(p[0])); - pic_block_hash[1][pos] = av1_get_crc_value( - &x->crc_calculator2, (uint8_t *)p, length * sizeof(p[0])); - pos++; - } - pos += width - 1; - } - } else { - uint8_t p[4]; - int pos = 0; - for (int y_pos = 0; y_pos < y_end; y_pos++) { - for (int x_pos = 0; x_pos < x_end; x_pos++) { - get_pixels_in_1D_char_array_by_block_2x2( - picture->y_buffer + y_pos * picture->y_stride + x_pos, - picture->y_stride, p); - pic_block_same_info[0][pos] = is_block_2x2_row_same_value(p); - pic_block_same_info[1][pos] = is_block_2x2_col_same_value(p); - - pic_block_hash[0][pos] = - av1_get_crc_value(&x->crc_calculator1, p, length * sizeof(p[0])); - pic_block_hash[1][pos] = - av1_get_crc_value(&x->crc_calculator2, p, length * sizeof(p[0])); - pos++; - } - pos += width - 1; - } - } -} - -void av1_generate_block_hash_value(const YV12_BUFFER_CONFIG *picture, - int block_size, - uint32_t *src_pic_block_hash[2], - uint32_t *dst_pic_block_hash[2], - int8_t *src_pic_block_same_info[3], - int8_t *dst_pic_block_same_info[3], - MACROBLOCK *x) { - const int pic_width = picture->y_crop_width; - const int x_end = picture->y_crop_width - block_size + 1; - const int y_end = picture->y_crop_height - block_size + 1; - - const int src_size = block_size >> 1; - const int quad_size = block_size >> 2; - - uint32_t p[4]; - const int length = sizeof(p); - - int pos = 0; - for (int y_pos = 0; y_pos < y_end; y_pos++) { - for (int x_pos = 0; x_pos < x_end; x_pos++) { - p[0] = src_pic_block_hash[0][pos]; - p[1] = src_pic_block_hash[0][pos + src_size]; - p[2] = src_pic_block_hash[0][pos + src_size * pic_width]; - p[3] = src_pic_block_hash[0][pos + src_size * pic_width + src_size]; - dst_pic_block_hash[0][pos] = - av1_get_crc_value(&x->crc_calculator1, (uint8_t *)p, length); - - p[0] = src_pic_block_hash[1][pos]; - p[1] = src_pic_block_hash[1][pos + src_size]; - p[2] = src_pic_block_hash[1][pos + src_size * pic_width]; - p[3] = src_pic_block_hash[1][pos + src_size * pic_width + src_size]; - dst_pic_block_hash[1][pos] = - av1_get_crc_value(&x->crc_calculator2, (uint8_t *)p, length); - - dst_pic_block_same_info[0][pos] = - src_pic_block_same_info[0][pos] && - src_pic_block_same_info[0][pos + quad_size] && - src_pic_block_same_info[0][pos + src_size] && - src_pic_block_same_info[0][pos + src_size * pic_width] && - src_pic_block_same_info[0][pos + src_size * pic_width + quad_size] && - src_pic_block_same_info[0][pos + src_size * pic_width + src_size]; - - dst_pic_block_same_info[1][pos] = - src_pic_block_same_info[1][pos] && - src_pic_block_same_info[1][pos + src_size] && - src_pic_block_same_info[1][pos + quad_size * pic_width] && - src_pic_block_same_info[1][pos + quad_size * pic_width + src_size] && - src_pic_block_same_info[1][pos + src_size * pic_width] && - src_pic_block_same_info[1][pos + src_size * pic_width + src_size]; - pos++; - } - pos += block_size - 1; - } - - if (block_size >= 4) { - const int size_minus_1 = block_size - 1; - pos = 0; - for (int y_pos = 0; y_pos < y_end; y_pos++) { - for (int x_pos = 0; x_pos < x_end; x_pos++) { - dst_pic_block_same_info[2][pos] = - (!dst_pic_block_same_info[0][pos] && - !dst_pic_block_same_info[1][pos]) || - (((x_pos & size_minus_1) == 0) && ((y_pos & size_minus_1) == 0)); - pos++; - } - pos += block_size - 1; - } - } -} - -void av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table, - uint32_t *pic_hash[2], - int8_t *pic_is_same, - int pic_width, int pic_height, - int block_size) { - const int x_end = pic_width - block_size + 1; - const int y_end = pic_height - block_size + 1; - - const int8_t *src_is_added = pic_is_same; - const uint32_t *src_hash[2] = { pic_hash[0], pic_hash[1] }; - - int add_value = hash_block_size_to_index(block_size); - assert(add_value >= 0); - add_value <<= crc_bits; - const int crc_mask = (1 << crc_bits) - 1; - - for (int x_pos = 0; x_pos < x_end; x_pos++) { - for (int y_pos = 0; y_pos < y_end; y_pos++) { - const int pos = y_pos * pic_width + x_pos; - // valid data - if (src_is_added[pos]) { - block_hash curr_block_hash; - curr_block_hash.x = x_pos; - curr_block_hash.y = y_pos; - - const uint32_t hash_value1 = (src_hash[0][pos] & crc_mask) + add_value; - curr_block_hash.hash_value2 = src_hash[1][pos]; - - hash_table_add_to_table(p_hash_table, hash_value1, &curr_block_hash); - } - } - } -} - -int av1_hash_is_horizontal_perfect(const YV12_BUFFER_CONFIG *picture, - int block_size, int x_start, int y_start) { - const int stride = picture->y_stride; - const uint8_t *p = picture->y_buffer + y_start * stride + x_start; - - if (picture->flags & YV12_FLAG_HIGHBITDEPTH) { - const uint16_t *p16 = CONVERT_TO_SHORTPTR(p); - for (int i = 0; i < block_size; i++) { - for (int j = 1; j < block_size; j++) { - if (p16[j] != p16[0]) { - return 0; - } - } - p16 += stride; - } - } else { - for (int i = 0; i < block_size; i++) { - for (int j = 1; j < block_size; j++) { - if (p[j] != p[0]) { - return 0; - } - } - p += stride; - } - } - - return 1; -} - -int av1_hash_is_vertical_perfect(const YV12_BUFFER_CONFIG *picture, - int block_size, int x_start, int y_start) { - const int stride = picture->y_stride; - const uint8_t *p = picture->y_buffer + y_start * stride + x_start; - - if (picture->flags & YV12_FLAG_HIGHBITDEPTH) { - const uint16_t *p16 = CONVERT_TO_SHORTPTR(p); - for (int i = 0; i < block_size; i++) { - for (int j = 1; j < block_size; j++) { - if (p16[j * stride + i] != p16[i]) { - return 0; - } - } - } - } else { - for (int i = 0; i < block_size; i++) { - for (int j = 1; j < block_size; j++) { - if (p[j * stride + i] != p[i]) { - return 0; - } - } - } - } - return 1; -} - -void av1_get_block_hash_value(uint8_t *y_src, int stride, int block_size, - uint32_t *hash_value1, uint32_t *hash_value2, - int use_highbitdepth, MACROBLOCK *x) { - uint32_t to_hash[4]; - const int add_value = hash_block_size_to_index(block_size) << crc_bits; - assert(add_value >= 0); - const int crc_mask = (1 << crc_bits) - 1; - - // 2x2 subblock hash values in current CU - int sub_block_in_width = (block_size >> 1); - if (use_highbitdepth) { - uint16_t pixel_to_hash[4]; - uint16_t *y16_src = CONVERT_TO_SHORTPTR(y_src); - for (int y_pos = 0; y_pos < block_size; y_pos += 2) { - for (int x_pos = 0; x_pos < block_size; x_pos += 2) { - int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1); - get_pixels_in_1D_short_array_by_block_2x2( - y16_src + y_pos * stride + x_pos, stride, pixel_to_hash); - assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH); - x->hash_value_buffer[0][0][pos] = - av1_get_crc_value(&x->crc_calculator1, (uint8_t *)pixel_to_hash, - sizeof(pixel_to_hash)); - x->hash_value_buffer[1][0][pos] = - av1_get_crc_value(&x->crc_calculator2, (uint8_t *)pixel_to_hash, - sizeof(pixel_to_hash)); - } - } - } else { - uint8_t pixel_to_hash[4]; - for (int y_pos = 0; y_pos < block_size; y_pos += 2) { - for (int x_pos = 0; x_pos < block_size; x_pos += 2) { - int pos = (y_pos >> 1) * sub_block_in_width + (x_pos >> 1); - get_pixels_in_1D_char_array_by_block_2x2(y_src + y_pos * stride + x_pos, - stride, pixel_to_hash); - assert(pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH); - x->hash_value_buffer[0][0][pos] = av1_get_crc_value( - &x->crc_calculator1, pixel_to_hash, sizeof(pixel_to_hash)); - x->hash_value_buffer[1][0][pos] = av1_get_crc_value( - &x->crc_calculator2, pixel_to_hash, sizeof(pixel_to_hash)); - } - } - } - - int src_sub_block_in_width = sub_block_in_width; - sub_block_in_width >>= 1; - - int src_idx = 1; - int dst_idx = 0; - - // 4x4 subblock hash values to current block hash values - for (int sub_width = 4; sub_width <= block_size; sub_width *= 2) { - src_idx = 1 - src_idx; - dst_idx = 1 - dst_idx; - - int dst_pos = 0; - for (int y_pos = 0; y_pos < sub_block_in_width; y_pos++) { - for (int x_pos = 0; x_pos < sub_block_in_width; x_pos++) { - int srcPos = (y_pos << 1) * src_sub_block_in_width + (x_pos << 1); - - assert(srcPos + 1 < AOM_BUFFER_SIZE_FOR_BLOCK_HASH); - assert(srcPos + src_sub_block_in_width + 1 < - AOM_BUFFER_SIZE_FOR_BLOCK_HASH); - assert(dst_pos < AOM_BUFFER_SIZE_FOR_BLOCK_HASH); - to_hash[0] = x->hash_value_buffer[0][src_idx][srcPos]; - to_hash[1] = x->hash_value_buffer[0][src_idx][srcPos + 1]; - to_hash[2] = - x->hash_value_buffer[0][src_idx][srcPos + src_sub_block_in_width]; - to_hash[3] = x->hash_value_buffer[0][src_idx] - [srcPos + src_sub_block_in_width + 1]; - - x->hash_value_buffer[0][dst_idx][dst_pos] = av1_get_crc_value( - &x->crc_calculator1, (uint8_t *)to_hash, sizeof(to_hash)); - - to_hash[0] = x->hash_value_buffer[1][src_idx][srcPos]; - to_hash[1] = x->hash_value_buffer[1][src_idx][srcPos + 1]; - to_hash[2] = - x->hash_value_buffer[1][src_idx][srcPos + src_sub_block_in_width]; - to_hash[3] = x->hash_value_buffer[1][src_idx] - [srcPos + src_sub_block_in_width + 1]; - x->hash_value_buffer[1][dst_idx][dst_pos] = av1_get_crc_value( - &x->crc_calculator2, (uint8_t *)to_hash, sizeof(to_hash)); - dst_pos++; - } - } - - src_sub_block_in_width = sub_block_in_width; - sub_block_in_width >>= 1; - } - - *hash_value1 = (x->hash_value_buffer[0][dst_idx][0] & crc_mask) + add_value; - *hash_value2 = x->hash_value_buffer[1][dst_idx][0]; -} diff --git a/third_party/aom/av1/encoder/hash_motion.h b/third_party/aom/av1/encoder/hash_motion.h deleted file mode 100644 index df3ec3215..000000000 --- a/third_party/aom/av1/encoder/hash_motion.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_HASH_MOTION_H_ -#define AOM_AV1_ENCODER_HASH_MOTION_H_ - -#include "config/aom_config.h" - -#include "aom/aom_integer.h" -#include "aom_scale/yv12config.h" -#include "third_party/vector/vector.h" -#ifdef __cplusplus -extern "C" { -#endif - -// store a block's hash info. -// x and y are the position from the top left of the picture -// hash_value2 is used to store the second hash value -typedef struct _block_hash { - int16_t x; - int16_t y; - uint32_t hash_value2; -} block_hash; - -typedef struct _hash_table { - Vector **p_lookup_table; -} hash_table; - -void av1_hash_table_init(hash_table *p_hash_table, struct macroblock *x); -void av1_hash_table_destroy(hash_table *p_hash_table); -void av1_hash_table_create(hash_table *p_hash_table); -int32_t av1_hash_table_count(hash_table *p_hash_table, uint32_t hash_value); -Iterator av1_hash_get_first_iterator(hash_table *p_hash_table, - uint32_t hash_value); -int32_t av1_has_exact_match(hash_table *p_hash_table, uint32_t hash_value1, - uint32_t hash_value2); -void av1_generate_block_2x2_hash_value(const YV12_BUFFER_CONFIG *picture, - uint32_t *pic_block_hash[2], - int8_t *pic_block_same_info[3], - struct macroblock *x); -void av1_generate_block_hash_value(const YV12_BUFFER_CONFIG *picture, - int block_size, - uint32_t *src_pic_block_hash[2], - uint32_t *dst_pic_block_hash[2], - int8_t *src_pic_block_same_info[3], - int8_t *dst_pic_block_same_info[3], - struct macroblock *x); -void av1_add_to_hash_map_by_row_with_precal_data(hash_table *p_hash_table, - uint32_t *pic_hash[2], - int8_t *pic_is_same, - int pic_width, int pic_height, - int block_size); - -// check whether the block starts from (x_start, y_start) with the size of -// block_size x block_size has the same color in all rows -int av1_hash_is_horizontal_perfect(const YV12_BUFFER_CONFIG *picture, - int block_size, int x_start, int y_start); -// check whether the block starts from (x_start, y_start) with the size of -// block_size x block_size has the same color in all columns -int av1_hash_is_vertical_perfect(const YV12_BUFFER_CONFIG *picture, - int block_size, int x_start, int y_start); -void av1_get_block_hash_value(uint8_t *y_src, int stride, int block_size, - uint32_t *hash_value1, uint32_t *hash_value2, - int use_highbitdepth, struct macroblock *x); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_HASH_MOTION_H_ diff --git a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c b/third_party/aom/av1/encoder/hybrid_fwd_txfm.c deleted file mode 100644 index 67898fd18..000000000 --- a/third_party/aom/av1/encoder/hybrid_fwd_txfm.c +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" -#include "config/aom_dsp_rtcd.h" - -#include "av1/common/idct.h" -#include "av1/encoder/hybrid_fwd_txfm.h" - -/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per - pixel. */ -void av1_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) { - int i; - tran_high_t a1, b1, c1, d1, e1; - const int16_t *ip_pass0 = input; - const tran_low_t *ip = NULL; - tran_low_t *op = output; - - for (i = 0; i < 4; i++) { - a1 = ip_pass0[0 * stride]; - b1 = ip_pass0[1 * stride]; - c1 = ip_pass0[2 * stride]; - d1 = ip_pass0[3 * stride]; - - a1 += b1; - d1 = d1 - c1; - e1 = (a1 - d1) >> 1; - b1 = e1 - b1; - c1 = e1 - c1; - a1 -= c1; - d1 += b1; - op[0] = (tran_low_t)a1; - op[4] = (tran_low_t)c1; - op[8] = (tran_low_t)d1; - op[12] = (tran_low_t)b1; - - ip_pass0++; - op++; - } - ip = output; - op = output; - - for (i = 0; i < 4; i++) { - a1 = ip[0]; - b1 = ip[1]; - c1 = ip[2]; - d1 = ip[3]; - - a1 += b1; - d1 -= c1; - e1 = (a1 - d1) >> 1; - b1 = e1 - b1; - c1 = e1 - c1; - a1 -= c1; - d1 += b1; - op[0] = (tran_low_t)(a1 * UNIT_QUANT_FACTOR); - op[1] = (tran_low_t)(c1 * UNIT_QUANT_FACTOR); - op[2] = (tran_low_t)(d1 * UNIT_QUANT_FACTOR); - op[3] = (tran_low_t)(b1 * UNIT_QUANT_FACTOR); - - ip += 4; - op += 4; - } -} - -void av1_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output, - int stride) { - av1_fwht4x4_c(input, output, stride); -} - -static void highbd_fwd_txfm_4x4(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - int32_t *dst_coeff = (int32_t *)coeff; - const TX_TYPE tx_type = txfm_param->tx_type; - const int bd = txfm_param->bd; - if (txfm_param->lossless) { - assert(tx_type == DCT_DCT); - av1_highbd_fwht4x4(src_diff, coeff, diff_stride); - return; - } - switch (tx_type) { - // use the c version for anything including identity for now - case V_DCT: - case H_DCT: - case V_ADST: - case H_ADST: - case V_FLIPADST: - case H_FLIPADST: - case IDTX: - av1_fwd_txfm2d_4x4_c(src_diff, dst_coeff, diff_stride, tx_type, bd); - break; - default: - av1_fwd_txfm2d_4x4(src_diff, dst_coeff, diff_stride, tx_type, bd); - break; - } -} - -static void highbd_fwd_txfm_4x8(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - int32_t *dst_coeff = (int32_t *)coeff; - av1_fwd_txfm2d_4x8_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, - txfm_param->bd); -} - -static void highbd_fwd_txfm_8x4(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - int32_t *dst_coeff = (int32_t *)coeff; - av1_fwd_txfm2d_8x4_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, - txfm_param->bd); -} - -static void highbd_fwd_txfm_8x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - int32_t *dst_coeff = (int32_t *)coeff; - const TX_TYPE tx_type = txfm_param->tx_type; - const int bd = txfm_param->bd; - switch (tx_type) { - // use the c version for anything including identity for now - case V_DCT: - case H_DCT: - case V_ADST: - case H_ADST: - case V_FLIPADST: - case H_FLIPADST: - case IDTX: - av1_fwd_txfm2d_8x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd); - break; - default: - av1_fwd_txfm2d_8x16(src_diff, dst_coeff, diff_stride, tx_type, bd); - break; - } -} - -static void highbd_fwd_txfm_16x8(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - int32_t *dst_coeff = (int32_t *)coeff; - const TX_TYPE tx_type = txfm_param->tx_type; - const int bd = txfm_param->bd; - switch (tx_type) { - // use the c version for anything including identity for now - case V_DCT: - case H_DCT: - case V_ADST: - case H_ADST: - case V_FLIPADST: - case H_FLIPADST: - case IDTX: - av1_fwd_txfm2d_16x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd); - break; - default: - av1_fwd_txfm2d_16x8(src_diff, dst_coeff, diff_stride, tx_type, bd); - break; - } -} - -static void highbd_fwd_txfm_16x32(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - int32_t *dst_coeff = (int32_t *)coeff; - av1_fwd_txfm2d_16x32_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, - txfm_param->bd); -} - -static void highbd_fwd_txfm_32x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - int32_t *dst_coeff = (int32_t *)coeff; - av1_fwd_txfm2d_32x16_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, - txfm_param->bd); -} - -static void highbd_fwd_txfm_16x4(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - int32_t *dst_coeff = (int32_t *)coeff; - av1_fwd_txfm2d_16x4_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, - txfm_param->bd); -} - -static void highbd_fwd_txfm_4x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - int32_t *dst_coeff = (int32_t *)coeff; - av1_fwd_txfm2d_4x16_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, - txfm_param->bd); -} - -static void highbd_fwd_txfm_32x8(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - int32_t *dst_coeff = (int32_t *)coeff; - av1_fwd_txfm2d_32x8_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, - txfm_param->bd); -} - -static void highbd_fwd_txfm_8x32(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - int32_t *dst_coeff = (int32_t *)coeff; - av1_fwd_txfm2d_8x32_c(src_diff, dst_coeff, diff_stride, txfm_param->tx_type, - txfm_param->bd); -} - -static void highbd_fwd_txfm_8x8(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - int32_t *dst_coeff = (int32_t *)coeff; - const TX_TYPE tx_type = txfm_param->tx_type; - const int bd = txfm_param->bd; - switch (tx_type) { - // use the c version for anything including identity for now - case V_DCT: - case H_DCT: - case V_ADST: - case H_ADST: - case V_FLIPADST: - case H_FLIPADST: - case IDTX: - av1_fwd_txfm2d_8x8_c(src_diff, dst_coeff, diff_stride, tx_type, bd); - break; - default: - av1_fwd_txfm2d_8x8(src_diff, dst_coeff, diff_stride, tx_type, bd); - break; - } -} - -static void highbd_fwd_txfm_16x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - int32_t *dst_coeff = (int32_t *)coeff; - const TX_TYPE tx_type = txfm_param->tx_type; - const int bd = txfm_param->bd; - switch (tx_type) { - // use the c version for anything including identity for now - case V_DCT: - case H_DCT: - case V_ADST: - case H_ADST: - case V_FLIPADST: - case H_FLIPADST: - case IDTX: - av1_fwd_txfm2d_16x16_c(src_diff, dst_coeff, diff_stride, tx_type, bd); - break; - default: - av1_fwd_txfm2d_16x16(src_diff, dst_coeff, diff_stride, tx_type, bd); - break; - } -} - -static void highbd_fwd_txfm_32x32(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - int32_t *dst_coeff = (int32_t *)coeff; - const TX_TYPE tx_type = txfm_param->tx_type; - const int bd = txfm_param->bd; - switch (tx_type) { - // use the c version for anything including identity for now - case V_DCT: - case H_DCT: - case V_ADST: - case H_ADST: - case V_FLIPADST: - case H_FLIPADST: - case IDTX: - av1_fwd_txfm2d_32x32_c(src_diff, dst_coeff, diff_stride, tx_type, bd); - break; - default: - av1_fwd_txfm2d_32x32(src_diff, dst_coeff, diff_stride, tx_type, bd); - break; - } -} - -static void highbd_fwd_txfm_32x64(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - assert(txfm_param->tx_type == DCT_DCT); - int32_t *dst_coeff = (int32_t *)coeff; - const int bd = txfm_param->bd; - av1_fwd_txfm2d_32x64_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd); -} - -static void highbd_fwd_txfm_64x32(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - assert(txfm_param->tx_type == DCT_DCT); - int32_t *dst_coeff = (int32_t *)coeff; - const int bd = txfm_param->bd; - av1_fwd_txfm2d_64x32_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd); -} - -static void highbd_fwd_txfm_16x64(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - assert(txfm_param->tx_type == DCT_DCT); - int32_t *dst_coeff = (int32_t *)coeff; - const int bd = txfm_param->bd; - av1_fwd_txfm2d_16x64_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd); -} - -static void highbd_fwd_txfm_64x16(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - assert(txfm_param->tx_type == DCT_DCT); - int32_t *dst_coeff = (int32_t *)coeff; - const int bd = txfm_param->bd; - av1_fwd_txfm2d_64x16_c(src_diff, dst_coeff, diff_stride, DCT_DCT, bd); -} - -static void highbd_fwd_txfm_64x64(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - assert(txfm_param->tx_type == DCT_DCT); - int32_t *dst_coeff = (int32_t *)coeff; - const int bd = txfm_param->bd; - av1_fwd_txfm2d_64x64(src_diff, dst_coeff, diff_stride, DCT_DCT, bd); -} - -void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, - TxfmParam *txfm_param) { - if (txfm_param->bd == 8) - av1_lowbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param); - else - av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param); -} - -void av1_lowbd_fwd_txfm_c(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - av1_highbd_fwd_txfm(src_diff, coeff, diff_stride, txfm_param); -} - -void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); - const TX_SIZE tx_size = txfm_param->tx_size; - switch (tx_size) { - case TX_64X64: - highbd_fwd_txfm_64x64(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_32X64: - highbd_fwd_txfm_32x64(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_64X32: - highbd_fwd_txfm_64x32(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_16X64: - highbd_fwd_txfm_16x64(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_64X16: - highbd_fwd_txfm_64x16(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_32X32: - highbd_fwd_txfm_32x32(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_16X16: - highbd_fwd_txfm_16x16(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_8X8: - highbd_fwd_txfm_8x8(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_4X8: - highbd_fwd_txfm_4x8(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_8X4: - highbd_fwd_txfm_8x4(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_8X16: - highbd_fwd_txfm_8x16(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_16X8: - highbd_fwd_txfm_16x8(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_16X32: - highbd_fwd_txfm_16x32(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_32X16: - highbd_fwd_txfm_32x16(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_4X4: - highbd_fwd_txfm_4x4(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_4X16: - highbd_fwd_txfm_4x16(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_16X4: - highbd_fwd_txfm_16x4(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_8X32: - highbd_fwd_txfm_8x32(src_diff, coeff, diff_stride, txfm_param); - break; - case TX_32X8: - highbd_fwd_txfm_32x8(src_diff, coeff, diff_stride, txfm_param); - break; - default: assert(0); break; - } -} diff --git a/third_party/aom/av1/encoder/hybrid_fwd_txfm.h b/third_party/aom/av1/encoder/hybrid_fwd_txfm.h deleted file mode 100644 index daabc7119..000000000 --- a/third_party/aom/av1/encoder/hybrid_fwd_txfm.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_HYBRID_FWD_TXFM_H_ -#define AOM_AV1_ENCODER_HYBRID_FWD_TXFM_H_ - -#include "config/aom_config.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride, - TxfmParam *txfm_param); - -void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_HYBRID_FWD_TXFM_H_ diff --git a/third_party/aom/av1/encoder/k_means_template.h b/third_party/aom/av1/encoder/k_means_template.h deleted file mode 100644 index 9e526b88b..000000000 --- a/third_party/aom/av1/encoder/k_means_template.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include - -#include "av1/encoder/palette.h" -#include "av1/encoder/random.h" - -#ifndef AV1_K_MEANS_DIM -#error "This template requires AV1_K_MEANS_DIM to be defined" -#endif - -#define RENAME_(x, y) AV1_K_MEANS_RENAME(x, y) -#define RENAME(x) RENAME_(x, AV1_K_MEANS_DIM) - -static int RENAME(calc_dist)(const int *p1, const int *p2) { - int dist = 0; - for (int i = 0; i < AV1_K_MEANS_DIM; ++i) { - const int diff = p1[i] - p2[i]; - dist += diff * diff; - } - return dist; -} - -void RENAME(av1_calc_indices)(const int *data, const int *centroids, - uint8_t *indices, int n, int k) { - for (int i = 0; i < n; ++i) { - int min_dist = RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM, centroids); - indices[i] = 0; - for (int j = 1; j < k; ++j) { - const int this_dist = RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM, - centroids + j * AV1_K_MEANS_DIM); - if (this_dist < min_dist) { - min_dist = this_dist; - indices[i] = j; - } - } - } -} - -static void RENAME(calc_centroids)(const int *data, int *centroids, - const uint8_t *indices, int n, int k) { - int i, j; - int count[PALETTE_MAX_SIZE] = { 0 }; - unsigned int rand_state = (unsigned int)data[0]; - assert(n <= 32768); - memset(centroids, 0, sizeof(centroids[0]) * k * AV1_K_MEANS_DIM); - - for (i = 0; i < n; ++i) { - const int index = indices[i]; - assert(index < k); - ++count[index]; - for (j = 0; j < AV1_K_MEANS_DIM; ++j) { - centroids[index * AV1_K_MEANS_DIM + j] += data[i * AV1_K_MEANS_DIM + j]; - } - } - - for (i = 0; i < k; ++i) { - if (count[i] == 0) { - memcpy(centroids + i * AV1_K_MEANS_DIM, - data + (lcg_rand16(&rand_state) % n) * AV1_K_MEANS_DIM, - sizeof(centroids[0]) * AV1_K_MEANS_DIM); - } else { - for (j = 0; j < AV1_K_MEANS_DIM; ++j) { - centroids[i * AV1_K_MEANS_DIM + j] = - DIVIDE_AND_ROUND(centroids[i * AV1_K_MEANS_DIM + j], count[i]); - } - } - } -} - -static int64_t RENAME(calc_total_dist)(const int *data, const int *centroids, - const uint8_t *indices, int n, int k) { - int64_t dist = 0; - (void)k; - for (int i = 0; i < n; ++i) { - dist += RENAME(calc_dist)(data + i * AV1_K_MEANS_DIM, - centroids + indices[i] * AV1_K_MEANS_DIM); - } - return dist; -} - -void RENAME(av1_k_means)(const int *data, int *centroids, uint8_t *indices, - int n, int k, int max_itr) { - int pre_centroids[2 * PALETTE_MAX_SIZE]; - uint8_t pre_indices[MAX_SB_SQUARE]; - - RENAME(av1_calc_indices)(data, centroids, indices, n, k); - int64_t this_dist = RENAME(calc_total_dist)(data, centroids, indices, n, k); - - for (int i = 0; i < max_itr; ++i) { - const int64_t pre_dist = this_dist; - memcpy(pre_centroids, centroids, - sizeof(pre_centroids[0]) * k * AV1_K_MEANS_DIM); - memcpy(pre_indices, indices, sizeof(pre_indices[0]) * n); - - RENAME(calc_centroids)(data, centroids, indices, n, k); - RENAME(av1_calc_indices)(data, centroids, indices, n, k); - this_dist = RENAME(calc_total_dist)(data, centroids, indices, n, k); - - if (this_dist > pre_dist) { - memcpy(centroids, pre_centroids, - sizeof(pre_centroids[0]) * k * AV1_K_MEANS_DIM); - memcpy(indices, pre_indices, sizeof(pre_indices[0]) * n); - break; - } - if (!memcmp(centroids, pre_centroids, - sizeof(pre_centroids[0]) * k * AV1_K_MEANS_DIM)) - break; - } -} -#undef RENAME_ -#undef RENAME diff --git a/third_party/aom/av1/encoder/lookahead.c b/third_party/aom/av1/encoder/lookahead.c deleted file mode 100644 index 1bf8ecbac..000000000 --- a/third_party/aom/av1/encoder/lookahead.c +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#include -#include - -#include "config/aom_config.h" - -#include "av1/common/common.h" -#include "av1/encoder/encoder.h" -#include "av1/encoder/extend.h" -#include "av1/encoder/lookahead.h" - -/* Return the buffer at the given absolute index and increment the index */ -static struct lookahead_entry *pop(struct lookahead_ctx *ctx, int *idx) { - int index = *idx; - struct lookahead_entry *buf = ctx->buf + index; - - assert(index < ctx->max_sz); - if (++index >= ctx->max_sz) index -= ctx->max_sz; - *idx = index; - return buf; -} - -void av1_lookahead_destroy(struct lookahead_ctx *ctx) { - if (ctx) { - if (ctx->buf) { - int i; - - for (i = 0; i < ctx->max_sz; i++) aom_free_frame_buffer(&ctx->buf[i].img); - free(ctx->buf); - } - free(ctx); - } -} - -struct lookahead_ctx *av1_lookahead_init( - unsigned int width, unsigned int height, unsigned int subsampling_x, - unsigned int subsampling_y, int use_highbitdepth, unsigned int depth) { - struct lookahead_ctx *ctx = NULL; - - // Clamp the lookahead queue depth - depth = clamp(depth, 1, MAX_LAG_BUFFERS); - - // Allocate memory to keep previous source frames available. - depth += MAX_PRE_FRAMES; - - // Allocate the lookahead structures - ctx = calloc(1, sizeof(*ctx)); - if (ctx) { - const int legacy_byte_alignment = 0; - unsigned int i; - ctx->max_sz = depth; - ctx->buf = calloc(depth, sizeof(*ctx->buf)); - if (!ctx->buf) goto bail; - for (i = 0; i < depth; i++) - if (aom_alloc_frame_buffer(&ctx->buf[i].img, width, height, subsampling_x, - subsampling_y, use_highbitdepth, - AOM_BORDER_IN_PIXELS, legacy_byte_alignment)) - goto bail; - } - return ctx; -bail: - av1_lookahead_destroy(ctx); - return NULL; -} - -#define USE_PARTIAL_COPY 0 - -int av1_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, - int64_t ts_start, int64_t ts_end, int use_highbitdepth, - aom_enc_frame_flags_t flags) { - struct lookahead_entry *buf; -#if USE_PARTIAL_COPY - int row, col, active_end; - int mb_rows = (src->y_height + 15) >> 4; - int mb_cols = (src->y_width + 15) >> 4; -#endif - int width = src->y_crop_width; - int height = src->y_crop_height; - int uv_width = src->uv_crop_width; - int uv_height = src->uv_crop_height; - int subsampling_x = src->subsampling_x; - int subsampling_y = src->subsampling_y; - int larger_dimensions, new_dimensions; - - if (ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz) return 1; - ctx->sz++; - buf = pop(ctx, &ctx->write_idx); - - new_dimensions = width != buf->img.y_crop_width || - height != buf->img.y_crop_height || - uv_width != buf->img.uv_crop_width || - uv_height != buf->img.uv_crop_height; - larger_dimensions = width > buf->img.y_width || height > buf->img.y_height || - uv_width > buf->img.uv_width || - uv_height > buf->img.uv_height; - assert(!larger_dimensions || new_dimensions); - -#if USE_PARTIAL_COPY - // TODO(jkoleszar): This is disabled for now, as - // av1_copy_and_extend_frame_with_rect is not subsampling/alpha aware. - - // Only do this partial copy if the following conditions are all met: - // 1. Lookahead queue has has size of 1. - // 2. Active map is provided. - // 3. This is not a key frame, golden nor altref frame. - if (!new_dimensions && ctx->max_sz == 1 && active_map && !flags) { - for (row = 0; row < mb_rows; ++row) { - col = 0; - - while (1) { - // Find the first active macroblock in this row. - for (; col < mb_cols; ++col) { - if (active_map[col]) break; - } - - // No more active macroblock in this row. - if (col == mb_cols) break; - - // Find the end of active region in this row. - active_end = col; - - for (; active_end < mb_cols; ++active_end) { - if (!active_map[active_end]) break; - } - - // Only copy this active region. - av1_copy_and_extend_frame_with_rect(src, &buf->img, row << 4, col << 4, - 16, (active_end - col) << 4); - - // Start again from the end of this active region. - col = active_end; - } - - active_map += mb_cols; - } - } else { -#endif - if (larger_dimensions) { - YV12_BUFFER_CONFIG new_img; - memset(&new_img, 0, sizeof(new_img)); - if (aom_alloc_frame_buffer(&new_img, width, height, subsampling_x, - subsampling_y, use_highbitdepth, - AOM_BORDER_IN_PIXELS, 0)) - return 1; - aom_free_frame_buffer(&buf->img); - buf->img = new_img; - } else if (new_dimensions) { - buf->img.y_crop_width = src->y_crop_width; - buf->img.y_crop_height = src->y_crop_height; - buf->img.uv_crop_width = src->uv_crop_width; - buf->img.uv_crop_height = src->uv_crop_height; - buf->img.subsampling_x = src->subsampling_x; - buf->img.subsampling_y = src->subsampling_y; - } - // Partial copy not implemented yet - av1_copy_and_extend_frame(src, &buf->img); -#if USE_PARTIAL_COPY - } -#endif - - buf->ts_start = ts_start; - buf->ts_end = ts_end; - buf->flags = flags; - return 0; -} - -struct lookahead_entry *av1_lookahead_pop(struct lookahead_ctx *ctx, - int drain) { - struct lookahead_entry *buf = NULL; - - if (ctx && ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { - buf = pop(ctx, &ctx->read_idx); - ctx->sz--; - } - return buf; -} - -struct lookahead_entry *av1_lookahead_peek(struct lookahead_ctx *ctx, - int index) { - struct lookahead_entry *buf = NULL; - - if (index >= 0) { - // Forward peek - if (index < ctx->sz) { - index += ctx->read_idx; - if (index >= ctx->max_sz) index -= ctx->max_sz; - buf = ctx->buf + index; - } - } else if (index < 0) { - // Backward peek - if (-index <= MAX_PRE_FRAMES) { - index += (int)(ctx->read_idx); - if (index < 0) index += (int)(ctx->max_sz); - buf = ctx->buf + index; - } - } - - return buf; -} - -unsigned int av1_lookahead_depth(struct lookahead_ctx *ctx) { return ctx->sz; } diff --git a/third_party/aom/av1/encoder/lookahead.h b/third_party/aom/av1/encoder/lookahead.h deleted file mode 100644 index e55224cf7..000000000 --- a/third_party/aom/av1/encoder/lookahead.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_LOOKAHEAD_H_ -#define AOM_AV1_ENCODER_LOOKAHEAD_H_ - -#include "aom_scale/yv12config.h" -#include "aom/aom_integer.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_LAG_BUFFERS 25 - -struct lookahead_entry { - YV12_BUFFER_CONFIG img; - int64_t ts_start; - int64_t ts_end; - aom_enc_frame_flags_t flags; -}; - -// The max of past frames we want to keep in the queue. -#define MAX_PRE_FRAMES 1 - -struct lookahead_ctx { - int max_sz; /* Absolute size of the queue */ - int sz; /* Number of buffers currently in the queue */ - int read_idx; /* Read index */ - int write_idx; /* Write index */ - struct lookahead_entry *buf; /* Buffer list */ -}; - -/**\brief Initializes the lookahead stage - * - * The lookahead stage is a queue of frame buffers on which some analysis - * may be done when buffers are enqueued. - */ -struct lookahead_ctx *av1_lookahead_init( - unsigned int width, unsigned int height, unsigned int subsampling_x, - unsigned int subsampling_y, int use_highbitdepth, unsigned int depth); - -/**\brief Destroys the lookahead stage - */ -void av1_lookahead_destroy(struct lookahead_ctx *ctx); - -/**\brief Enqueue a source buffer - * - * This function will copy the source image into a new framebuffer with - * the expected stride/border. - * - * If active_map is non-NULL and there is only one frame in the queue, then copy - * only active macroblocks. - * - * \param[in] ctx Pointer to the lookahead context - * \param[in] src Pointer to the image to enqueue - * \param[in] ts_start Timestamp for the start of this frame - * \param[in] ts_end Timestamp for the end of this frame - * \param[in] flags Flags set on this frame - * \param[in] active_map Map that specifies which macroblock is active - */ -int av1_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, - int64_t ts_start, int64_t ts_end, int use_highbitdepth, - aom_enc_frame_flags_t flags); - -/**\brief Get the next source buffer to encode - * - * - * \param[in] ctx Pointer to the lookahead context - * \param[in] drain Flag indicating the buffer should be drained - * (return a buffer regardless of the current queue depth) - * - * \retval NULL, if drain set and queue is empty - * \retval NULL, if drain not set and queue not of the configured depth - */ -struct lookahead_entry *av1_lookahead_pop(struct lookahead_ctx *ctx, int drain); - -/**\brief Get a future source buffer to encode - * - * \param[in] ctx Pointer to the lookahead context - * \param[in] index Index of the frame to be returned, 0 == next frame - * - * \retval NULL, if no buffer exists at the specified index - */ -struct lookahead_entry *av1_lookahead_peek(struct lookahead_ctx *ctx, - int index); - -/**\brief Get the number of frames currently in the lookahead queue - * - * \param[in] ctx Pointer to the lookahead context - */ -unsigned int av1_lookahead_depth(struct lookahead_ctx *ctx); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_LOOKAHEAD_H_ diff --git a/third_party/aom/av1/encoder/mathutils.h b/third_party/aom/av1/encoder/mathutils.h deleted file mode 100644 index 64f936176..000000000 --- a/third_party/aom/av1/encoder/mathutils.h +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_MATHUTILS_H_ -#define AOM_AV1_ENCODER_MATHUTILS_H_ - -#include -#include -#include -#include -#include - -static const double TINY_NEAR_ZERO = 1.0E-16; - -// Solves Ax = b, where x and b are column vectors of size nx1 and A is nxn -static INLINE int linsolve(int n, double *A, int stride, double *b, double *x) { - int i, j, k; - double c; - // Forward elimination - for (k = 0; k < n - 1; k++) { - // Bring the largest magnitude to the diagonal position - for (i = n - 1; i > k; i--) { - if (fabs(A[(i - 1) * stride + k]) < fabs(A[i * stride + k])) { - for (j = 0; j < n; j++) { - c = A[i * stride + j]; - A[i * stride + j] = A[(i - 1) * stride + j]; - A[(i - 1) * stride + j] = c; - } - c = b[i]; - b[i] = b[i - 1]; - b[i - 1] = c; - } - } - for (i = k; i < n - 1; i++) { - if (fabs(A[k * stride + k]) < TINY_NEAR_ZERO) return 0; - c = A[(i + 1) * stride + k] / A[k * stride + k]; - for (j = 0; j < n; j++) A[(i + 1) * stride + j] -= c * A[k * stride + j]; - b[i + 1] -= c * b[k]; - } - } - // Backward substitution - for (i = n - 1; i >= 0; i--) { - if (fabs(A[i * stride + i]) < TINY_NEAR_ZERO) return 0; - c = 0; - for (j = i + 1; j <= n - 1; j++) c += A[i * stride + j] * x[j]; - x[i] = (b[i] - c) / A[i * stride + i]; - } - - return 1; -} - -//////////////////////////////////////////////////////////////////////////////// -// Least-squares -// Solves for n-dim x in a least squares sense to minimize |Ax - b|^2 -// The solution is simply x = (A'A)^-1 A'b or simply the solution for -// the system: A'A x = A'b -static INLINE int least_squares(int n, double *A, int rows, int stride, - double *b, double *scratch, double *x) { - int i, j, k; - double *scratch_ = NULL; - double *AtA, *Atb; - if (!scratch) { - scratch_ = (double *)aom_malloc(sizeof(*scratch) * n * (n + 1)); - scratch = scratch_; - } - AtA = scratch; - Atb = scratch + n * n; - - for (i = 0; i < n; ++i) { - for (j = i; j < n; ++j) { - AtA[i * n + j] = 0.0; - for (k = 0; k < rows; ++k) - AtA[i * n + j] += A[k * stride + i] * A[k * stride + j]; - AtA[j * n + i] = AtA[i * n + j]; - } - Atb[i] = 0; - for (k = 0; k < rows; ++k) Atb[i] += A[k * stride + i] * b[k]; - } - int ret = linsolve(n, AtA, n, Atb, x); - if (scratch_) aom_free(scratch_); - return ret; -} - -// Matrix multiply -static INLINE void multiply_mat(const double *m1, const double *m2, double *res, - const int m1_rows, const int inner_dim, - const int m2_cols) { - double sum; - - int row, col, inner; - for (row = 0; row < m1_rows; ++row) { - for (col = 0; col < m2_cols; ++col) { - sum = 0; - for (inner = 0; inner < inner_dim; ++inner) - sum += m1[row * inner_dim + inner] * m2[inner * m2_cols + col]; - *(res++) = sum; - } - } -} - -// -// The functions below are needed only for homography computation -// Remove if the homography models are not used. -// -/////////////////////////////////////////////////////////////////////////////// -// svdcmp -// Adopted from Numerical Recipes in C - -static INLINE double sign(double a, double b) { - return ((b) >= 0 ? fabs(a) : -fabs(a)); -} - -static INLINE double pythag(double a, double b) { - double ct; - const double absa = fabs(a); - const double absb = fabs(b); - - if (absa > absb) { - ct = absb / absa; - return absa * sqrt(1.0 + ct * ct); - } else { - ct = absa / absb; - return (absb == 0) ? 0 : absb * sqrt(1.0 + ct * ct); - } -} - -static INLINE int svdcmp(double **u, int m, int n, double w[], double **v) { - const int max_its = 30; - int flag, i, its, j, jj, k, l, nm; - double anorm, c, f, g, h, s, scale, x, y, z; - double *rv1 = (double *)aom_malloc(sizeof(*rv1) * (n + 1)); - g = scale = anorm = 0.0; - for (i = 0; i < n; i++) { - l = i + 1; - rv1[i] = scale * g; - g = s = scale = 0.0; - if (i < m) { - for (k = i; k < m; k++) scale += fabs(u[k][i]); - if (scale != 0.) { - for (k = i; k < m; k++) { - u[k][i] /= scale; - s += u[k][i] * u[k][i]; - } - f = u[i][i]; - g = -sign(sqrt(s), f); - h = f * g - s; - u[i][i] = f - g; - for (j = l; j < n; j++) { - for (s = 0.0, k = i; k < m; k++) s += u[k][i] * u[k][j]; - f = s / h; - for (k = i; k < m; k++) u[k][j] += f * u[k][i]; - } - for (k = i; k < m; k++) u[k][i] *= scale; - } - } - w[i] = scale * g; - g = s = scale = 0.0; - if (i < m && i != n - 1) { - for (k = l; k < n; k++) scale += fabs(u[i][k]); - if (scale != 0.) { - for (k = l; k < n; k++) { - u[i][k] /= scale; - s += u[i][k] * u[i][k]; - } - f = u[i][l]; - g = -sign(sqrt(s), f); - h = f * g - s; - u[i][l] = f - g; - for (k = l; k < n; k++) rv1[k] = u[i][k] / h; - for (j = l; j < m; j++) { - for (s = 0.0, k = l; k < n; k++) s += u[j][k] * u[i][k]; - for (k = l; k < n; k++) u[j][k] += s * rv1[k]; - } - for (k = l; k < n; k++) u[i][k] *= scale; - } - } - anorm = fmax(anorm, (fabs(w[i]) + fabs(rv1[i]))); - } - - for (i = n - 1; i >= 0; i--) { - if (i < n - 1) { - if (g != 0.) { - for (j = l; j < n; j++) v[j][i] = (u[i][j] / u[i][l]) / g; - for (j = l; j < n; j++) { - for (s = 0.0, k = l; k < n; k++) s += u[i][k] * v[k][j]; - for (k = l; k < n; k++) v[k][j] += s * v[k][i]; - } - } - for (j = l; j < n; j++) v[i][j] = v[j][i] = 0.0; - } - v[i][i] = 1.0; - g = rv1[i]; - l = i; - } - for (i = AOMMIN(m, n) - 1; i >= 0; i--) { - l = i + 1; - g = w[i]; - for (j = l; j < n; j++) u[i][j] = 0.0; - if (g != 0.) { - g = 1.0 / g; - for (j = l; j < n; j++) { - for (s = 0.0, k = l; k < m; k++) s += u[k][i] * u[k][j]; - f = (s / u[i][i]) * g; - for (k = i; k < m; k++) u[k][j] += f * u[k][i]; - } - for (j = i; j < m; j++) u[j][i] *= g; - } else { - for (j = i; j < m; j++) u[j][i] = 0.0; - } - ++u[i][i]; - } - for (k = n - 1; k >= 0; k--) { - for (its = 0; its < max_its; its++) { - flag = 1; - for (l = k; l >= 0; l--) { - nm = l - 1; - if ((double)(fabs(rv1[l]) + anorm) == anorm || nm < 0) { - flag = 0; - break; - } - if ((double)(fabs(w[nm]) + anorm) == anorm) break; - } - if (flag) { - c = 0.0; - s = 1.0; - for (i = l; i <= k; i++) { - f = s * rv1[i]; - rv1[i] = c * rv1[i]; - if ((double)(fabs(f) + anorm) == anorm) break; - g = w[i]; - h = pythag(f, g); - w[i] = h; - h = 1.0 / h; - c = g * h; - s = -f * h; - for (j = 0; j < m; j++) { - y = u[j][nm]; - z = u[j][i]; - u[j][nm] = y * c + z * s; - u[j][i] = z * c - y * s; - } - } - } - z = w[k]; - if (l == k) { - if (z < 0.0) { - w[k] = -z; - for (j = 0; j < n; j++) v[j][k] = -v[j][k]; - } - break; - } - if (its == max_its - 1) { - aom_free(rv1); - return 1; - } - assert(k > 0); - x = w[l]; - nm = k - 1; - y = w[nm]; - g = rv1[nm]; - h = rv1[k]; - f = ((y - z) * (y + z) + (g - h) * (g + h)) / (2.0 * h * y); - g = pythag(f, 1.0); - f = ((x - z) * (x + z) + h * ((y / (f + sign(g, f))) - h)) / x; - c = s = 1.0; - for (j = l; j <= nm; j++) { - i = j + 1; - g = rv1[i]; - y = w[i]; - h = s * g; - g = c * g; - z = pythag(f, h); - rv1[j] = z; - c = f / z; - s = h / z; - f = x * c + g * s; - g = g * c - x * s; - h = y * s; - y *= c; - for (jj = 0; jj < n; jj++) { - x = v[jj][j]; - z = v[jj][i]; - v[jj][j] = x * c + z * s; - v[jj][i] = z * c - x * s; - } - z = pythag(f, h); - w[j] = z; - if (z != 0.) { - z = 1.0 / z; - c = f * z; - s = h * z; - } - f = c * g + s * y; - x = c * y - s * g; - for (jj = 0; jj < m; jj++) { - y = u[jj][j]; - z = u[jj][i]; - u[jj][j] = y * c + z * s; - u[jj][i] = z * c - y * s; - } - } - rv1[l] = 0.0; - rv1[k] = f; - w[k] = x; - } - } - aom_free(rv1); - return 0; -} - -static INLINE int SVD(double *U, double *W, double *V, double *matx, int M, - int N) { - // Assumes allocation for U is MxN - double **nrU = (double **)aom_malloc((M) * sizeof(*nrU)); - double **nrV = (double **)aom_malloc((N) * sizeof(*nrV)); - int problem, i; - - problem = !(nrU && nrV); - if (!problem) { - for (i = 0; i < M; i++) { - nrU[i] = &U[i * N]; - } - for (i = 0; i < N; i++) { - nrV[i] = &V[i * N]; - } - } else { - if (nrU) aom_free(nrU); - if (nrV) aom_free(nrV); - return 1; - } - - /* copy from given matx into nrU */ - for (i = 0; i < M; i++) { - memcpy(&(nrU[i][0]), matx + N * i, N * sizeof(*matx)); - } - - /* HERE IT IS: do SVD */ - if (svdcmp(nrU, M, N, W, nrV)) { - aom_free(nrU); - aom_free(nrV); - return 1; - } - - /* aom_free Numerical Recipes arrays */ - aom_free(nrU); - aom_free(nrV); - - return 0; -} - -#endif // AOM_AV1_ENCODER_MATHUTILS_H_ diff --git a/third_party/aom/av1/encoder/mbgraph.c b/third_party/aom/av1/encoder/mbgraph.c deleted file mode 100644 index 1a35ff77c..000000000 --- a/third_party/aom/av1/encoder/mbgraph.c +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include - -#include "config/av1_rtcd.h" -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/system_state.h" -#include "av1/common/blockd.h" -#include "av1/common/reconinter.h" -#include "av1/common/reconintra.h" -#include "av1/encoder/mcomp.h" -#include "av1/encoder/reconinter_enc.h" -#include "av1/encoder/segmentation.h" - -static unsigned int do_16x16_motion_iteration(AV1_COMP *cpi, const MV *ref_mv, - int mb_row, int mb_col) { - MACROBLOCK *const x = &cpi->td.mb; - MACROBLOCKD *const xd = &x->e_mbd; - const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; - const aom_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; - - const MvLimits tmp_mv_limits = x->mv_limits; - MV ref_full; - int cost_list[5]; - - // Further step/diamond searches as necessary - int step_param = mv_sf->reduce_first_step_size; - step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 2); - - av1_set_mv_search_range(&x->mv_limits, ref_mv); - - ref_full.col = ref_mv->col >> 3; - ref_full.row = ref_mv->row >> 3; - - /*cpi->sf.search_method == HEX*/ - av1_hex_search(x, &ref_full, step_param, x->errorperbit, 0, - cond_cost_list(cpi, cost_list), &v_fn_ptr, 0, ref_mv); - - // Try sub-pixel MC - // if (bestsme > error_thresh && bestsme < INT_MAX) - if (cpi->common.cur_frame_force_integer_mv == 1) { - x->best_mv.as_mv.row *= 8; - x->best_mv.as_mv.col *= 8; - } else { - int distortion; - unsigned int sse; - cpi->find_fractional_mv_step( - x, &cpi->common, mb_row, mb_col, ref_mv, - cpi->common.allow_high_precision_mv, x->errorperbit, &v_fn_ptr, 0, - mv_sf->subpel_iters_per_step, cond_cost_list(cpi, cost_list), NULL, - NULL, &distortion, &sse, NULL, NULL, 0, 0, 0, 0, 0); - } - - if (has_second_ref(xd->mi[0])) - xd->mi[0]->mode = NEW_NEWMV; - else - xd->mi[0]->mode = NEWMV; - - xd->mi[0]->mv[0] = x->best_mv; - xd->mi[0]->ref_frame[1] = NONE_FRAME; - - av1_build_inter_predictors_sby(&cpi->common, xd, mb_row, mb_col, NULL, - BLOCK_16X16); - - /* restore UMV window */ - x->mv_limits = tmp_mv_limits; - - return aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].dst.buf, xd->plane[0].dst.stride); -} - -static int do_16x16_motion_search(AV1_COMP *cpi, const MV *ref_mv, int mb_row, - int mb_col) { - MACROBLOCK *const x = &cpi->td.mb; - MACROBLOCKD *const xd = &x->e_mbd; - unsigned int err, tmp_err; - MV best_mv; - - // Try zero MV first - // FIXME should really use something like near/nearest MV and/or MV prediction - err = aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); - best_mv.col = best_mv.row = 0; - - // Test last reference frame using the previous best mv as the - // starting point (best reference) for the search - tmp_err = do_16x16_motion_iteration(cpi, ref_mv, mb_row, mb_col); - if (tmp_err < err) { - err = tmp_err; - best_mv = x->best_mv.as_mv; - } - - // If the current best reference mv is not centered on 0,0 then do a 0,0 - // based search as well. - if (ref_mv->row != 0 || ref_mv->col != 0) { - MV zero_ref_mv = kZeroMv; - - tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, mb_row, mb_col); - if (tmp_err < err) { - err = tmp_err; - best_mv = x->best_mv.as_mv; - } - } - - x->best_mv.as_mv = best_mv; - return err; -} - -static int do_16x16_zerozero_search(AV1_COMP *cpi, int_mv *dst_mv) { - MACROBLOCK *const x = &cpi->td.mb; - MACROBLOCKD *const xd = &x->e_mbd; - unsigned int err; - - // Try zero MV first - // FIXME should really use something like near/nearest MV and/or MV prediction - err = aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); - - dst_mv->as_int = 0; - - return err; -} -static int find_best_16x16_intra(AV1_COMP *cpi, PREDICTION_MODE *pbest_mode) { - const AV1_COMMON *cm = &cpi->common; - MACROBLOCK *const x = &cpi->td.mb; - MACROBLOCKD *const xd = &x->e_mbd; - PREDICTION_MODE best_mode = -1, mode; - unsigned int best_err = INT_MAX; - - // calculate SATD for each intra prediction mode; - // we're intentionally not doing 4x4, we just want a rough estimate - for (mode = INTRA_MODE_START; mode < INTRA_MODE_END; mode++) { - unsigned int err; - - xd->mi[0]->mode = mode; - av1_predict_intra_block(cm, xd, 16, 16, TX_16X16, mode, 0, 0, - FILTER_INTRA_MODES, x->plane[0].src.buf, - x->plane[0].src.stride, xd->plane[0].dst.buf, - xd->plane[0].dst.stride, 0, 0, 0); - err = aom_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, - xd->plane[0].dst.buf, xd->plane[0].dst.stride); - - // find best - if (err < best_err) { - best_err = err; - best_mode = mode; - } - } - - if (pbest_mode) *pbest_mode = best_mode; - - return best_err; -} - -static void update_mbgraph_mb_stats(AV1_COMP *cpi, MBGRAPH_MB_STATS *stats, - YV12_BUFFER_CONFIG *buf, int mb_y_offset, - YV12_BUFFER_CONFIG *golden_ref, - const MV *prev_golden_ref_mv, - YV12_BUFFER_CONFIG *alt_ref, int mb_row, - int mb_col) { - MACROBLOCK *const x = &cpi->td.mb; - MACROBLOCKD *const xd = &x->e_mbd; - int intra_error; - AV1_COMMON *cm = &cpi->common; - - // FIXME in practice we're completely ignoring chroma here - x->plane[0].src.buf = buf->y_buffer + mb_y_offset; - x->plane[0].src.stride = buf->y_stride; - - xd->plane[0].dst.buf = get_frame_new_buffer(cm)->y_buffer + mb_y_offset; - xd->plane[0].dst.stride = get_frame_new_buffer(cm)->y_stride; - - // do intra 16x16 prediction - intra_error = find_best_16x16_intra(cpi, &stats->ref[INTRA_FRAME].m.mode); - if (intra_error <= 0) intra_error = 1; - stats->ref[INTRA_FRAME].err = intra_error; - - // Golden frame MV search, if it exists and is different than last frame - if (golden_ref) { - int g_motion_error; - xd->plane[0].pre[0].buf = golden_ref->y_buffer + mb_y_offset; - xd->plane[0].pre[0].stride = golden_ref->y_stride; - g_motion_error = - do_16x16_motion_search(cpi, prev_golden_ref_mv, mb_row, mb_col); - stats->ref[GOLDEN_FRAME].m.mv = x->best_mv; - stats->ref[GOLDEN_FRAME].err = g_motion_error; - } else { - stats->ref[GOLDEN_FRAME].err = INT_MAX; - stats->ref[GOLDEN_FRAME].m.mv.as_int = 0; - } - - // Do an Alt-ref frame MV search, if it exists and is different than - // last/golden frame. - if (alt_ref) { - int a_motion_error; - xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset; - xd->plane[0].pre[0].stride = alt_ref->y_stride; - a_motion_error = - do_16x16_zerozero_search(cpi, &stats->ref[ALTREF_FRAME].m.mv); - - stats->ref[ALTREF_FRAME].err = a_motion_error; - } else { - stats->ref[ALTREF_FRAME].err = INT_MAX; - stats->ref[ALTREF_FRAME].m.mv.as_int = 0; - } -} - -static void update_mbgraph_frame_stats(AV1_COMP *cpi, - MBGRAPH_FRAME_STATS *stats, - YV12_BUFFER_CONFIG *buf, - YV12_BUFFER_CONFIG *golden_ref, - YV12_BUFFER_CONFIG *alt_ref) { - MACROBLOCK *const x = &cpi->td.mb; - MACROBLOCKD *const xd = &x->e_mbd; - AV1_COMMON *const cm = &cpi->common; - - int mb_col, mb_row, offset = 0; - int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0; - MV gld_top_mv = kZeroMv; - MB_MODE_INFO mi_local; - - av1_zero(mi_local); - // Set up limit values for motion vectors to prevent them extending outside - // the UMV borders. - x->mv_limits.row_min = -BORDER_MV_PIXELS_B16; - x->mv_limits.row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16; - xd->up_available = 0; - xd->plane[0].dst.stride = buf->y_stride; - xd->plane[0].pre[0].stride = buf->y_stride; - xd->plane[1].dst.stride = buf->uv_stride; - xd->mi[0] = &mi_local; - mi_local.sb_type = BLOCK_16X16; - mi_local.ref_frame[0] = LAST_FRAME; - mi_local.ref_frame[1] = NONE_FRAME; - - for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - MV gld_left_mv = gld_top_mv; - int mb_y_in_offset = mb_y_offset; - int arf_y_in_offset = arf_y_offset; - int gld_y_in_offset = gld_y_offset; - - // Set up limit values for motion vectors to prevent them extending outside - // the UMV borders. - x->mv_limits.col_min = -BORDER_MV_PIXELS_B16; - x->mv_limits.col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16; - xd->left_available = 0; - - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - MBGRAPH_MB_STATS *mb_stats = &stats->mb_stats[offset + mb_col]; - - update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset, golden_ref, - &gld_left_mv, alt_ref, mb_row, mb_col); - gld_left_mv = mb_stats->ref[GOLDEN_FRAME].m.mv.as_mv; - if (mb_col == 0) { - gld_top_mv = gld_left_mv; - } - xd->left_available = 1; - mb_y_in_offset += 16; - gld_y_in_offset += 16; - arf_y_in_offset += 16; - x->mv_limits.col_min -= 16; - x->mv_limits.col_max -= 16; - } - xd->up_available = 1; - mb_y_offset += buf->y_stride * 16; - gld_y_offset += golden_ref->y_stride * 16; - if (alt_ref) arf_y_offset += alt_ref->y_stride * 16; - x->mv_limits.row_min -= 16; - x->mv_limits.row_max -= 16; - offset += cm->mb_cols; - } -} - -// void separate_arf_mbs_byzz -static void separate_arf_mbs(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - int mb_col, mb_row, offset, i; - int mi_row, mi_col; - int ncnt[4] = { 0 }; - int n_frames = cpi->mbgraph_n_frames; - - int *arf_not_zz; - - CHECK_MEM_ERROR( - cm, arf_not_zz, - aom_calloc(cm->mb_rows * cm->mb_cols * sizeof(*arf_not_zz), 1)); - - // We are not interested in results beyond the alt ref itself. - if (n_frames > cpi->rc.frames_till_gf_update_due) - n_frames = cpi->rc.frames_till_gf_update_due; - - // defer cost to reference frames - for (i = n_frames - 1; i >= 0; i--) { - MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; - - for (offset = 0, mb_row = 0; mb_row < cm->mb_rows; - offset += cm->mb_cols, mb_row++) { - for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { - MBGRAPH_MB_STATS *mb_stats = &frame_stats->mb_stats[offset + mb_col]; - - int altref_err = mb_stats->ref[ALTREF_FRAME].err; - int intra_err = mb_stats->ref[INTRA_FRAME].err; - int golden_err = mb_stats->ref[GOLDEN_FRAME].err; - - // Test for altref vs intra and gf and that its mv was 0,0. - if (altref_err > 1000 || altref_err > intra_err || - altref_err > golden_err) { - arf_not_zz[offset + mb_col]++; - } - } - } - } - - // arf_not_zz is indexed by MB, but this loop is indexed by MI to avoid out - // of bound access in segmentation_map - for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { - for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { - // If any of the blocks in the sequence failed then the MB - // goes in segment 0 - if (arf_not_zz[mi_row / 2 * cm->mb_cols + mi_col / 2]) { - ncnt[0]++; - cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 0; - } else { - cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 1; - ncnt[1]++; - } - } - } - - // Only bother with segmentation if over 10% of the MBs in static segment - // if ( ncnt[1] && (ncnt[0] / ncnt[1] < 10) ) - if (1) { - // Note % of blocks that are marked as static - if (cm->MBs) - cpi->static_mb_pct = (ncnt[1] * 100) / (cm->mi_rows * cm->mi_cols); - - // This error case should not be reachable as this function should - // never be called with the common data structure uninitialized. - else - cpi->static_mb_pct = 0; - - av1_enable_segmentation(&cm->seg); - } else { - cpi->static_mb_pct = 0; - av1_disable_segmentation(&cm->seg); - } - - // Free localy allocated storage - aom_free(arf_not_zz); -} - -void av1_update_mbgraph_stats(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - int i, n_frames = av1_lookahead_depth(cpi->lookahead); - YV12_BUFFER_CONFIG *golden_ref = get_ref_frame_buffer(cpi, GOLDEN_FRAME); - - assert(golden_ref != NULL); - - // we need to look ahead beyond where the ARF transitions into - // being a GF - so exit if we don't look ahead beyond that - if (n_frames <= cpi->rc.frames_till_gf_update_due) return; - - if (n_frames > MAX_LAG_BUFFERS) n_frames = MAX_LAG_BUFFERS; - - cpi->mbgraph_n_frames = n_frames; - for (i = 0; i < n_frames; i++) { - MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; - memset(frame_stats->mb_stats, 0, - cm->mb_rows * cm->mb_cols * sizeof(*cpi->mbgraph_stats[i].mb_stats)); - } - - // do motion search to find contribution of each reference to data - // later on in this GF group - // FIXME really, the GF/last MC search should be done forward, and - // the ARF MC search backwards, to get optimal results for MV caching - for (i = 0; i < n_frames; i++) { - MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; - struct lookahead_entry *q_cur = av1_lookahead_peek(cpi->lookahead, i); - - assert(q_cur != NULL); - - update_mbgraph_frame_stats(cpi, frame_stats, &q_cur->img, golden_ref, - cpi->source); - } - - aom_clear_system_state(); - - separate_arf_mbs(cpi); -} diff --git a/third_party/aom/av1/encoder/mbgraph.h b/third_party/aom/av1/encoder/mbgraph.h deleted file mode 100644 index ba08476f7..000000000 --- a/third_party/aom/av1/encoder/mbgraph.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_MBGRAPH_H_ -#define AOM_AV1_ENCODER_MBGRAPH_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - struct { - int err; - union { - int_mv mv; - PREDICTION_MODE mode; - } m; - } ref[REF_FRAMES]; -} MBGRAPH_MB_STATS; - -typedef struct { - MBGRAPH_MB_STATS *mb_stats; -} MBGRAPH_FRAME_STATS; - -struct AV1_COMP; - -void av1_update_mbgraph_stats(struct AV1_COMP *cpi); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_MBGRAPH_H_ diff --git a/third_party/aom/av1/encoder/mcomp.c b/third_party/aom/av1/encoder/mcomp.c deleted file mode 100644 index 8f6de9b53..000000000 --- a/third_party/aom/av1/encoder/mcomp.c +++ /dev/null @@ -1,2885 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include - -#include "config/aom_config.h" -#include "config/aom_dsp_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/mem.h" - -#include "av1/common/common.h" -#include "av1/common/mvref_common.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/reconinter.h" - -#include "av1/encoder/encoder.h" -#include "av1/encoder/encodemv.h" -#include "av1/encoder/mcomp.h" -#include "av1/encoder/rdopt.h" -#include "av1/encoder/reconinter_enc.h" - -// #define NEW_DIAMOND_SEARCH - -static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, - const MV *mv) { - return &buf->buf[mv->row * buf->stride + mv->col]; -} - -void av1_set_mv_search_range(MvLimits *mv_limits, const MV *mv) { - int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); - int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); - int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; - int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; - - col_min = AOMMAX(col_min, (MV_LOW >> 3) + 1); - row_min = AOMMAX(row_min, (MV_LOW >> 3) + 1); - col_max = AOMMIN(col_max, (MV_UPP >> 3) - 1); - row_max = AOMMIN(row_max, (MV_UPP >> 3) - 1); - - // Get intersection of UMV window and valid MV window to reduce # of checks - // in diamond search. - if (mv_limits->col_min < col_min) mv_limits->col_min = col_min; - if (mv_limits->col_max > col_max) mv_limits->col_max = col_max; - if (mv_limits->row_min < row_min) mv_limits->row_min = row_min; - if (mv_limits->row_max > row_max) mv_limits->row_max = row_max; -} - -static void set_subpel_mv_search_range(const MvLimits *mv_limits, int *col_min, - int *col_max, int *row_min, int *row_max, - const MV *ref_mv) { - const int max_mv = MAX_FULL_PEL_VAL * 8; - const int minc = AOMMAX(mv_limits->col_min * 8, ref_mv->col - max_mv); - const int maxc = AOMMIN(mv_limits->col_max * 8, ref_mv->col + max_mv); - const int minr = AOMMAX(mv_limits->row_min * 8, ref_mv->row - max_mv); - const int maxr = AOMMIN(mv_limits->row_max * 8, ref_mv->row + max_mv); - - *col_min = AOMMAX(MV_LOW + 1, minc); - *col_max = AOMMIN(MV_UPP - 1, maxc); - *row_min = AOMMAX(MV_LOW + 1, minr); - *row_max = AOMMIN(MV_UPP - 1, maxr); -} - -int av1_init_search_range(int size) { - int sr = 0; - // Minimum search size no matter what the passed in value. - size = AOMMAX(16, size); - - while ((size << sr) < MAX_FULL_PEL_VAL) sr++; - - sr = AOMMIN(sr, MAX_MVSEARCH_STEPS - 2); - return sr; -} - -static INLINE int mv_cost(const MV *mv, const int *joint_cost, - int *const comp_cost[2]) { - return joint_cost[av1_get_mv_joint(mv)] + comp_cost[0][mv->row] + - comp_cost[1][mv->col]; -} - -int av1_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost, - int *mvcost[2], int weight) { - const MV diff = { mv->row - ref->row, mv->col - ref->col }; - return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); -} - -#define PIXEL_TRANSFORM_ERROR_SCALE 4 -static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost, - int *mvcost[2], int error_per_bit) { - if (mvcost) { - const MV diff = { mv->row - ref->row, mv->col - ref->col }; - return (int)ROUND_POWER_OF_TWO_64( - (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit, - RDDIV_BITS + AV1_PROB_COST_SHIFT - RD_EPB_SHIFT + - PIXEL_TRANSFORM_ERROR_SCALE); - } - return 0; -} - -static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, - int sad_per_bit) { - const MV diff = { (mv->row - ref->row) * 8, (mv->col - ref->col) * 8 }; - return ROUND_POWER_OF_TWO( - (unsigned)mv_cost(&diff, x->nmvjointcost, x->mvcost) * sad_per_bit, - AV1_PROB_COST_SHIFT); -} - -void av1_init_dsmotion_compensation(search_site_config *cfg, int stride) { - int len, ss_count = 1; - - cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0; - cfg->ss[0].offset = 0; - - for (len = MAX_FIRST_STEP; len > 0; len /= 2) { - // Generate offsets for 4 search sites per step. - const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } }; - int i; - for (i = 0; i < 4; ++i) { - search_site *const ss = &cfg->ss[ss_count++]; - ss->mv = ss_mvs[i]; - ss->offset = ss->mv.row * stride + ss->mv.col; - } - } - - cfg->ss_count = ss_count; - cfg->searches_per_step = 4; -} - -void av1_init3smotion_compensation(search_site_config *cfg, int stride) { - int len, ss_count = 1; - - cfg->ss[0].mv.col = cfg->ss[0].mv.row = 0; - cfg->ss[0].offset = 0; - - for (len = MAX_FIRST_STEP; len > 0; len /= 2) { - // Generate offsets for 8 search sites per step. - const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len }, - { 0, len }, { -len, -len }, { -len, len }, - { len, -len }, { len, len } }; - int i; - for (i = 0; i < 8; ++i) { - search_site *const ss = &cfg->ss[ss_count++]; - ss->mv = ss_mvs[i]; - ss->offset = ss->mv.row * stride + ss->mv.col; - } - } - - cfg->ss_count = ss_count; - cfg->searches_per_step = 8; -} - -/* - * To avoid the penalty for crossing cache-line read, preload the reference - * area in a small buffer, which is aligned to make sure there won't be crossing - * cache-line read while reading from this buffer. This reduced the cpu - * cycles spent on reading ref data in sub-pixel filter functions. - * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x - * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we - * could reduce the area. - */ - -// convert motion vector component to offset for sv[a]f calc -static INLINE int sp(int x) { return x & 7; } - -static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { - const int offset = (r >> 3) * stride + (c >> 3); - return buf + offset; -} - -/* checks if (r, c) has better score than previous best */ -#define CHECK_BETTER(v, r, c) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ - MV this_mv = { r, c }; \ - v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \ - if (second_pred == NULL) { \ - thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \ - src_address, src_stride, &sse); \ - } else if (mask) { \ - thismse = vfp->msvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \ - src_address, src_stride, second_pred, mask, \ - mask_stride, invert_mask, &sse); \ - } else { \ - if (xd->jcp_param.use_jnt_comp_avg) \ - thismse = vfp->jsvaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \ - src_address, src_stride, &sse, second_pred, \ - &xd->jcp_param); \ - else \ - thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), \ - src_address, src_stride, &sse, second_pred); \ - } \ - v += thismse; \ - if (v < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ - } - -#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c) - -/* checks if (r, c) has better score than previous best */ -#define CHECK_BETTER1(v, r, c) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ - MV this_mv = { r, c }; \ - thismse = upsampled_pref_error( \ - xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride, \ - pre(y, y_stride, r, c), y_stride, sp(c), sp(r), second_pred, mask, \ - mask_stride, invert_mask, w, h, &sse, use_accurate_subpel_search); \ - v = mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); \ - v += thismse; \ - if (v < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ - } - -#define FIRST_LEVEL_CHECKS \ - { \ - unsigned int left, right, up, down, diag; \ - CHECK_BETTER(left, tr, tc - hstep); \ - CHECK_BETTER(right, tr, tc + hstep); \ - CHECK_BETTER(up, tr - hstep, tc); \ - CHECK_BETTER(down, tr + hstep, tc); \ - whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \ - switch (whichdir) { \ - case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \ - case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \ - case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \ - case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \ - } \ - } - -#define SECOND_LEVEL_CHECKS \ - { \ - int kr, kc; \ - unsigned int second; \ - if (tr != br && tc != bc) { \ - kr = br - tr; \ - kc = bc - tc; \ - CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ - CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ - } else if (tr == br && tc != bc) { \ - kc = bc - tc; \ - CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ - CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ - switch (whichdir) { \ - case 0: \ - case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \ - case 2: \ - case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \ - } \ - } else if (tr != br && tc == bc) { \ - kr = br - tr; \ - CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ - CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ - switch (whichdir) { \ - case 0: \ - case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \ - case 1: \ - case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \ - } \ - } \ - } - -// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of -// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten -// later in the same way. -#define SECOND_LEVEL_CHECKS_BEST(k) \ - { \ - unsigned int second; \ - int br0 = br; \ - int bc0 = bc; \ - assert(tr == br || tc == bc); \ - if (tr == br && tc != bc) { \ - kc = bc - tc; \ - } else if (tr != br && tc == bc) { \ - kr = br - tr; \ - } \ - CHECK_BETTER##k(second, br0 + kr, bc0); \ - CHECK_BETTER##k(second, br0, bc0 + kc); \ - if (br0 != br || bc0 != bc) { \ - CHECK_BETTER##k(second, br0 + kr, bc0 + kc); \ - } \ - } - -#define SETUP_SUBPEL_SEARCH \ - const uint8_t *const src_address = x->plane[0].src.buf; \ - const int src_stride = x->plane[0].src.stride; \ - const MACROBLOCKD *xd = &x->e_mbd; \ - unsigned int besterr = INT_MAX; \ - unsigned int sse; \ - unsigned int whichdir; \ - int thismse; \ - MV *bestmv = &x->best_mv.as_mv; \ - const unsigned int halfiters = iters_per_step; \ - const unsigned int quarteriters = iters_per_step; \ - const unsigned int eighthiters = iters_per_step; \ - const int y_stride = xd->plane[0].pre[0].stride; \ - const int offset = bestmv->row * y_stride + bestmv->col; \ - const uint8_t *const y = xd->plane[0].pre[0].buf; \ - \ - int br = bestmv->row * 8; \ - int bc = bestmv->col * 8; \ - int hstep = 4; \ - int minc, maxc, minr, maxr; \ - int tr = br; \ - int tc = bc; \ - \ - set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, \ - ref_mv); \ - \ - bestmv->row *= 8; \ - bestmv->col *= 8; - -static unsigned int setup_center_error( - const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv, - int error_per_bit, const aom_variance_fn_ptr_t *vfp, - const uint8_t *const src, const int src_stride, const uint8_t *const y, - int y_stride, const uint8_t *second_pred, const uint8_t *mask, - int mask_stride, int invert_mask, int w, int h, int offset, int *mvjcost, - int *mvcost[2], unsigned int *sse1, int *distortion) { - unsigned int besterr; - if (second_pred != NULL) { - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED(16, uint16_t, comp_pred16[MAX_SB_SQUARE]); - uint8_t *comp_pred = CONVERT_TO_BYTEPTR(comp_pred16); - if (mask) { - aom_highbd_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, - y_stride, mask, mask_stride, invert_mask); - } else { - if (xd->jcp_param.use_jnt_comp_avg) - aom_highbd_jnt_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, - y_stride, &xd->jcp_param); - else - aom_highbd_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, - y_stride); - } - besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); - } else { - DECLARE_ALIGNED(16, uint8_t, comp_pred[MAX_SB_SQUARE]); - if (mask) { - aom_comp_mask_pred(comp_pred, second_pred, w, h, y + offset, y_stride, - mask, mask_stride, invert_mask); - } else { - if (xd->jcp_param.use_jnt_comp_avg) - aom_jnt_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, - y_stride, &xd->jcp_param); - else - aom_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); - } - besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); - } - } else { - besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); - } - *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); - return besterr; -} - -static INLINE int divide_and_round(int n, int d) { - return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d); -} - -static INLINE int is_cost_list_wellbehaved(int *cost_list) { - return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] && - cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4]; -} - -// Returns surface minima estimate at given precision in 1/2^n bits. -// Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C -// For a given set of costs S0, S1, S2, S3, S4 at points -// (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively, -// the solution for the location of the minima (x0, y0) is given by: -// x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0), -// y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0). -// The code below is an integerized version of that. -static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) { - *ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)), - (cost_list[1] - 2 * cost_list[0] + cost_list[3])); - *ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)), - (cost_list[4] - 2 * cost_list[0] + cost_list[2])); -} - -int av1_find_best_sub_pixel_tree_pruned_evenmore( - MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col, - const MV *ref_mv, int allow_hp, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, - int *cost_list, int *mvjcost, int *mvcost[2], int *distortion, - unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask, - int mask_stride, int invert_mask, int w, int h, - int use_accurate_subpel_search) { - SETUP_SUBPEL_SEARCH; - besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, - src_address, src_stride, y, y_stride, - second_pred, mask, mask_stride, invert_mask, w, - h, offset, mvjcost, mvcost, sse1, distortion); - (void)halfiters; - (void)quarteriters; - (void)eighthiters; - (void)whichdir; - (void)allow_hp; - (void)forced_stop; - (void)hstep; - (void)use_accurate_subpel_search; - (void)cm; - (void)mi_row; - (void)mi_col; - - if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && - cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && - cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { - int ir, ic; - unsigned int minpt; - get_cost_surf_min(cost_list, &ir, &ic, 2); - if (ir != 0 || ic != 0) { - CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic); - } - } else { - FIRST_LEVEL_CHECKS; - if (halfiters > 1) { - SECOND_LEVEL_CHECKS; - } - - tr = br; - tc = bc; - - // Each subsequent iteration checks at least one point in common with - // the last iteration could be 2 ( if diag selected) 1/4 pel - // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only - if (forced_stop != 2) { - hstep >>= 1; - FIRST_LEVEL_CHECKS; - if (quarteriters > 1) { - SECOND_LEVEL_CHECKS; - } - } - } - - tr = br; - tc = bc; - - if (allow_hp && forced_stop == 0) { - hstep >>= 1; - FIRST_LEVEL_CHECKS; - if (eighthiters > 1) { - SECOND_LEVEL_CHECKS; - } - } - - bestmv->row = br; - bestmv->col = bc; - - return besterr; -} - -int av1_find_best_sub_pixel_tree_pruned_more( - MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col, - const MV *ref_mv, int allow_hp, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, - int *cost_list, int *mvjcost, int *mvcost[2], int *distortion, - unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask, - int mask_stride, int invert_mask, int w, int h, - int use_accurate_subpel_search) { - SETUP_SUBPEL_SEARCH; - (void)use_accurate_subpel_search; - (void)cm; - (void)mi_row; - (void)mi_col; - - besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, - src_address, src_stride, y, y_stride, - second_pred, mask, mask_stride, invert_mask, w, - h, offset, mvjcost, mvcost, sse1, distortion); - if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && - cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && - cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { - unsigned int minpt; - int ir, ic; - get_cost_surf_min(cost_list, &ir, &ic, 1); - if (ir != 0 || ic != 0) { - CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep); - } - } else { - FIRST_LEVEL_CHECKS; - if (halfiters > 1) { - SECOND_LEVEL_CHECKS; - } - } - - // Each subsequent iteration checks at least one point in common with - // the last iteration could be 2 ( if diag selected) 1/4 pel - - // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only - if (forced_stop != 2) { - tr = br; - tc = bc; - hstep >>= 1; - FIRST_LEVEL_CHECKS; - if (quarteriters > 1) { - SECOND_LEVEL_CHECKS; - } - } - - if (allow_hp && forced_stop == 0) { - tr = br; - tc = bc; - hstep >>= 1; - FIRST_LEVEL_CHECKS; - if (eighthiters > 1) { - SECOND_LEVEL_CHECKS; - } - } - // These lines insure static analysis doesn't warn that - // tr and tc aren't used after the above point. - (void)tr; - (void)tc; - - bestmv->row = br; - bestmv->col = bc; - - return besterr; -} - -int av1_find_best_sub_pixel_tree_pruned( - MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col, - const MV *ref_mv, int allow_hp, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, - int *cost_list, int *mvjcost, int *mvcost[2], int *distortion, - unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask, - int mask_stride, int invert_mask, int w, int h, - int use_accurate_subpel_search) { - SETUP_SUBPEL_SEARCH; - (void)use_accurate_subpel_search; - (void)cm; - (void)mi_row; - (void)mi_col; - - besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, - src_address, src_stride, y, y_stride, - second_pred, mask, mask_stride, invert_mask, w, - h, offset, mvjcost, mvcost, sse1, distortion); - if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && - cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && - cost_list[4] != INT_MAX) { - unsigned int left, right, up, down, diag; - whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) + - (cost_list[2] < cost_list[4] ? 0 : 2); - switch (whichdir) { - case 0: - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(down, tr + hstep, tc); - CHECK_BETTER(diag, tr + hstep, tc - hstep); - break; - case 1: - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(down, tr + hstep, tc); - CHECK_BETTER(diag, tr + hstep, tc + hstep); - break; - case 2: - CHECK_BETTER(left, tr, tc - hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(diag, tr - hstep, tc - hstep); - break; - case 3: - CHECK_BETTER(right, tr, tc + hstep); - CHECK_BETTER(up, tr - hstep, tc); - CHECK_BETTER(diag, tr - hstep, tc + hstep); - break; - } - } else { - FIRST_LEVEL_CHECKS; - if (halfiters > 1) { - SECOND_LEVEL_CHECKS; - } - } - - tr = br; - tc = bc; - - // Each subsequent iteration checks at least one point in common with - // the last iteration could be 2 ( if diag selected) 1/4 pel - - // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only - if (forced_stop != 2) { - hstep >>= 1; - FIRST_LEVEL_CHECKS; - if (quarteriters > 1) { - SECOND_LEVEL_CHECKS; - } - tr = br; - tc = bc; - } - - if (allow_hp && forced_stop == 0) { - hstep >>= 1; - FIRST_LEVEL_CHECKS; - if (eighthiters > 1) { - SECOND_LEVEL_CHECKS; - } - tr = br; - tc = bc; - } - // These lines insure static analysis doesn't warn that - // tr and tc aren't used after the above point. - (void)tr; - (void)tc; - - bestmv->row = br; - bestmv->col = bc; - - return besterr; -} - -/* clang-format off */ -static const MV search_step_table[12] = { - // left, right, up, down - { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 }, - { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 }, - { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 } -}; -/* clang-format on */ - -static int upsampled_pref_error(MACROBLOCKD *xd, const AV1_COMMON *const cm, - int mi_row, int mi_col, const MV *const mv, - const aom_variance_fn_ptr_t *vfp, - const uint8_t *const src, const int src_stride, - const uint8_t *const y, int y_stride, - int subpel_x_q3, int subpel_y_q3, - const uint8_t *second_pred, const uint8_t *mask, - int mask_stride, int invert_mask, int w, int h, - unsigned int *sse, int subpel_search) { - unsigned int besterr; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - DECLARE_ALIGNED(16, uint16_t, pred16[MAX_SB_SQUARE]); - uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred16); - if (second_pred != NULL) { - if (mask) { - aom_highbd_comp_mask_upsampled_pred( - xd, cm, mi_row, mi_col, mv, pred8, second_pred, w, h, subpel_x_q3, - subpel_y_q3, y, y_stride, mask, mask_stride, invert_mask, xd->bd, - subpel_search); - } else { - if (xd->jcp_param.use_jnt_comp_avg) - aom_highbd_jnt_comp_avg_upsampled_pred( - xd, cm, mi_row, mi_col, mv, pred8, second_pred, w, h, subpel_x_q3, - subpel_y_q3, y, y_stride, xd->bd, &xd->jcp_param, subpel_search); - else - aom_highbd_comp_avg_upsampled_pred( - xd, cm, mi_row, mi_col, mv, pred8, second_pred, w, h, subpel_x_q3, - subpel_y_q3, y, y_stride, xd->bd, subpel_search); - } - } else { - aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h, - subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd, - subpel_search); - } - besterr = vfp->vf(pred8, w, src, src_stride, sse); - } else { - DECLARE_ALIGNED(16, uint8_t, pred[MAX_SB_SQUARE]); - if (second_pred != NULL) { - if (mask) { - aom_comp_mask_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, - second_pred, w, h, subpel_x_q3, - subpel_y_q3, y, y_stride, mask, - mask_stride, invert_mask, subpel_search); - } else { - if (xd->jcp_param.use_jnt_comp_avg) - aom_jnt_comp_avg_upsampled_pred( - xd, cm, mi_row, mi_col, mv, pred, second_pred, w, h, subpel_x_q3, - subpel_y_q3, y, y_stride, &xd->jcp_param, subpel_search); - else - aom_comp_avg_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, - second_pred, w, h, subpel_x_q3, - subpel_y_q3, y, y_stride, subpel_search); - } - } else { - aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3, - subpel_y_q3, y, y_stride, subpel_search); - } - - besterr = vfp->vf(pred, w, src, src_stride, sse); - } - return besterr; -} - -static unsigned int upsampled_setup_center_error( - MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col, - const MV *bestmv, const MV *ref_mv, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, const uint8_t *const src, - const int src_stride, const uint8_t *const y, int y_stride, - const uint8_t *second_pred, const uint8_t *mask, int mask_stride, - int invert_mask, int w, int h, int offset, int *mvjcost, int *mvcost[2], - unsigned int *sse1, int *distortion, int subpel_search) { - unsigned int besterr = - upsampled_pref_error(xd, cm, mi_row, mi_col, bestmv, vfp, src, src_stride, - y + offset, y_stride, 0, 0, second_pred, mask, - mask_stride, invert_mask, w, h, sse1, subpel_search); - *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); - return besterr; -} - -// when use_accurate_subpel_search == 0 -static INLINE unsigned int estimate_upsampled_pref_error( - MACROBLOCKD *xd, const aom_variance_fn_ptr_t *vfp, const uint8_t *const src, - const int src_stride, const uint8_t *const pre, int y_stride, - int subpel_x_q3, int subpel_y_q3, const uint8_t *second_pred, - const uint8_t *mask, int mask_stride, int invert_mask, unsigned int *sse) { - if (second_pred == NULL) { - return vfp->svf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride, - sse); - } else if (mask) { - return vfp->msvf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride, - second_pred, mask, mask_stride, invert_mask, sse); - } else { - if (xd->jcp_param.use_jnt_comp_avg) - return vfp->jsvaf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, - src_stride, sse, second_pred, &xd->jcp_param); - else - return vfp->svaf(pre, y_stride, subpel_x_q3, subpel_y_q3, src, src_stride, - sse, second_pred); - } -} - -int av1_find_best_sub_pixel_tree( - MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col, - const MV *ref_mv, int allow_hp, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, - int *cost_list, int *mvjcost, int *mvcost[2], int *distortion, - unsigned int *sse1, const uint8_t *second_pred, const uint8_t *mask, - int mask_stride, int invert_mask, int w, int h, - int use_accurate_subpel_search) { - const uint8_t *const src_address = x->plane[0].src.buf; - const int src_stride = x->plane[0].src.stride; - MACROBLOCKD *xd = &x->e_mbd; - unsigned int besterr = INT_MAX; - unsigned int sse; - unsigned int thismse; - const int y_stride = xd->plane[0].pre[0].stride; - MV *bestmv = &x->best_mv.as_mv; - const int offset = bestmv->row * y_stride + bestmv->col; - const uint8_t *const y = xd->plane[0].pre[0].buf; - - int br = bestmv->row * 8; - int bc = bestmv->col * 8; - int hstep = 4; - int iter, round = 3 - forced_stop; - int tr = br; - int tc = bc; - const MV *search_step = search_step_table; - int idx, best_idx = -1; - unsigned int cost_array[5]; - int kr, kc; - int minc, maxc, minr, maxr; - - set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv); - - if (!allow_hp) - if (round == 3) round = 2; - - bestmv->row *= 8; - bestmv->col *= 8; - - if (use_accurate_subpel_search) - besterr = upsampled_setup_center_error( - xd, cm, mi_row, mi_col, bestmv, ref_mv, error_per_bit, vfp, src_address, - src_stride, y, y_stride, second_pred, mask, mask_stride, invert_mask, w, - h, offset, mvjcost, mvcost, sse1, distortion, - use_accurate_subpel_search); - else - besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, - src_address, src_stride, y, y_stride, - second_pred, mask, mask_stride, invert_mask, w, - h, offset, mvjcost, mvcost, sse1, distortion); - - (void)cost_list; // to silence compiler warning - - for (iter = 0; iter < round; ++iter) { - // Check vertical and horizontal sub-pixel positions. - for (idx = 0; idx < 4; ++idx) { - tr = br + search_step[idx].row; - tc = bc + search_step[idx].col; - if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { - MV this_mv = { tr, tc }; - - if (use_accurate_subpel_search) { - thismse = upsampled_pref_error( - xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride, - pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), second_pred, - mask, mask_stride, invert_mask, w, h, &sse, - use_accurate_subpel_search); - } else { - thismse = estimate_upsampled_pref_error( - xd, vfp, src_address, src_stride, pre(y, y_stride, tr, tc), - y_stride, sp(tc), sp(tr), second_pred, mask, mask_stride, - invert_mask, &sse); - } - - cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, - mvcost, error_per_bit); - - if (cost_array[idx] < besterr) { - best_idx = idx; - besterr = cost_array[idx]; - *distortion = thismse; - *sse1 = sse; - } - } else { - cost_array[idx] = INT_MAX; - } - } - - // Check diagonal sub-pixel position - kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep); - kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep); - - tc = bc + kc; - tr = br + kr; - if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { - MV this_mv = { tr, tc }; - - if (use_accurate_subpel_search) { - thismse = upsampled_pref_error( - xd, cm, mi_row, mi_col, &this_mv, vfp, src_address, src_stride, - pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), second_pred, - mask, mask_stride, invert_mask, w, h, &sse, - use_accurate_subpel_search); - } else { - thismse = estimate_upsampled_pref_error( - xd, vfp, src_address, src_stride, pre(y, y_stride, tr, tc), - y_stride, sp(tc), sp(tr), second_pred, mask, mask_stride, - invert_mask, &sse); - } - - cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (cost_array[4] < besterr) { - best_idx = 4; - besterr = cost_array[4]; - *distortion = thismse; - *sse1 = sse; - } - } else { - cost_array[idx] = INT_MAX; - } - - if (best_idx < 4 && best_idx >= 0) { - br += search_step[best_idx].row; - bc += search_step[best_idx].col; - } else if (best_idx == 4) { - br = tr; - bc = tc; - } - - if (iters_per_step > 1 && best_idx != -1) { - if (use_accurate_subpel_search) { - SECOND_LEVEL_CHECKS_BEST(1); - } else { - SECOND_LEVEL_CHECKS_BEST(0); - } - } - - search_step += 4; - hstep >>= 1; - best_idx = -1; - } - - // These lines insure static analysis doesn't warn that - // tr and tc aren't used after the above point. - (void)tr; - (void)tc; - - bestmv->row = br; - bestmv->col = bc; - - return besterr; -} - -#undef PRE -#undef CHECK_BETTER - -unsigned int av1_compute_motion_cost(const AV1_COMP *cpi, MACROBLOCK *const x, - BLOCK_SIZE bsize, int mi_row, int mi_col, - const MV *this_mv) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - const uint8_t *const src = x->plane[0].src.buf; - const int src_stride = x->plane[0].src.stride; - uint8_t *const dst = xd->plane[0].dst.buf; - const int dst_stride = xd->plane[0].dst.stride; - const aom_variance_fn_ptr_t *vfp = &cpi->fn_ptr[bsize]; - const int_mv ref_mv = av1_get_ref_mv(x, 0); - unsigned int mse; - unsigned int sse; - - av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize); - mse = vfp->vf(dst, dst_stride, src, src_stride, &sse); - mse += mv_err_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost, x->mvcost, - x->errorperbit); - return mse; -} - -// Refine MV in a small range -unsigned int av1_refine_warped_mv(const AV1_COMP *cpi, MACROBLOCK *const x, - BLOCK_SIZE bsize, int mi_row, int mi_col, - int *pts0, int *pts_inref0, - int total_samples) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - const MV neighbors[8] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 }, - { 0, -2 }, { 2, 0 }, { 0, 2 }, { -2, 0 } }; - const int_mv ref_mv = av1_get_ref_mv(x, 0); - int16_t br = mbmi->mv[0].as_mv.row; - int16_t bc = mbmi->mv[0].as_mv.col; - int16_t *tr = &mbmi->mv[0].as_mv.row; - int16_t *tc = &mbmi->mv[0].as_mv.col; - WarpedMotionParams best_wm_params = mbmi->wm_params; - int best_num_proj_ref = mbmi->num_proj_ref; - unsigned int bestmse; - int minc, maxc, minr, maxr; - const int start = cm->allow_high_precision_mv ? 0 : 4; - int ite; - - set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, - &ref_mv.as_mv); - - // Calculate the center position's error - assert(bc >= minc && bc <= maxc && br >= minr && br <= maxr); - bestmse = av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col, - &mbmi->mv[0].as_mv); - - // MV search - for (ite = 0; ite < 2; ++ite) { - int best_idx = -1; - int idx; - - for (idx = start; idx < start + 4; ++idx) { - unsigned int thismse; - - *tr = br + neighbors[idx].row; - *tc = bc + neighbors[idx].col; - - if (*tc >= minc && *tc <= maxc && *tr >= minr && *tr <= maxr) { - MV this_mv = { *tr, *tc }; - int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; - - memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0)); - memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0)); - if (total_samples > 1) - mbmi->num_proj_ref = - selectSamples(&this_mv, pts, pts_inref, total_samples, bsize); - - if (!find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, *tr, - *tc, &mbmi->wm_params, mi_row, mi_col)) { - thismse = - av1_compute_motion_cost(cpi, x, bsize, mi_row, mi_col, &this_mv); - - if (thismse < bestmse) { - best_idx = idx; - best_wm_params = mbmi->wm_params; - best_num_proj_ref = mbmi->num_proj_ref; - bestmse = thismse; - } - } - } - } - - if (best_idx == -1) break; - - if (best_idx >= 0) { - br += neighbors[best_idx].row; - bc += neighbors[best_idx].col; - } - } - - *tr = br; - *tc = bc; - mbmi->wm_params = best_wm_params; - mbmi->num_proj_ref = best_num_proj_ref; - return bestmse; -} - -static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col, - int range) { - return ((row - range) >= mv_limits->row_min) & - ((row + range) <= mv_limits->row_max) & - ((col - range) >= mv_limits->col_min) & - ((col + range) <= mv_limits->col_max); -} - -static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) { - return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) && - (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max); -} - -#define CHECK_BETTER \ - { \ - if (thissad < bestsad) { \ - if (use_mvcost) \ - thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \ - if (thissad < bestsad) { \ - bestsad = thissad; \ - best_site = i; \ - } \ - } \ - } - -#define MAX_PATTERN_SCALES 11 -#define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale -#define PATTERN_CANDIDATES_REF 3 // number of refinement candidates - -// Calculate and return a sad+mvcost list around an integer best pel. -static INLINE void calc_int_cost_list(const MACROBLOCK *x, - const MV *const ref_mv, int sadpb, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *best_mv, int *cost_list) { - static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0]; - const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; - const int br = best_mv->row; - const int bc = best_mv->col; - int i; - unsigned int sse; - const MV this_mv = { br, bc }; - - cost_list[0] = - fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), - in_what->stride, &sse) + - mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); - if (check_bounds(&x->mv_limits, br, bc, 1)) { - for (i = 0; i < 4; i++) { - const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col }; - cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, - get_buf_from_mv(in_what, &neighbor_mv), - in_what->stride, &sse) + - mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost, - x->mvcost, x->errorperbit); - } - } else { - for (i = 0; i < 4; i++) { - const MV neighbor_mv = { br + neighbors[i].row, bc + neighbors[i].col }; - if (!is_mv_in(&x->mv_limits, &neighbor_mv)) - cost_list[i + 1] = INT_MAX; - else - cost_list[i + 1] = - fn_ptr->vf(what->buf, what->stride, - get_buf_from_mv(in_what, &neighbor_mv), in_what->stride, - &sse) + - mv_err_cost(&neighbor_mv, &fcenter_mv, x->nmvjointcost, x->mvcost, - x->errorperbit); - } - } -} - -static INLINE void calc_int_sad_list(const MACROBLOCK *x, - const MV *const ref_mv, int sadpb, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *best_mv, int *cost_list, - const int use_mvcost, const int bestsad) { - static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0]; - const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; - int i; - const int br = best_mv->row; - const int bc = best_mv->col; - - if (cost_list[0] == INT_MAX) { - cost_list[0] = bestsad; - if (check_bounds(&x->mv_limits, br, bc, 1)) { - for (i = 0; i < 4; i++) { - const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; - cost_list[i + 1] = - fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), in_what->stride); - } - } else { - for (i = 0; i < 4; i++) { - const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; - if (!is_mv_in(&x->mv_limits, &this_mv)) - cost_list[i + 1] = INT_MAX; - else - cost_list[i + 1] = - fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), in_what->stride); - } - } - } else { - if (use_mvcost) { - for (i = 0; i < 4; i++) { - const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; - if (cost_list[i + 1] != INT_MAX) { - cost_list[i + 1] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); - } - } - } - } -} - -// Generic pattern search function that searches over multiple scales. -// Each scale can have a different number of candidates and shape of -// candidates as indicated in the num_candidates and candidates arrays -// passed into this function -// -static int pattern_search( - MACROBLOCK *x, MV *start_mv, int search_param, int sad_per_bit, - int do_init_search, int *cost_list, const aom_variance_fn_ptr_t *vfp, - int use_mvcost, const MV *center_mv, - const int num_candidates[MAX_PATTERN_SCALES], - const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) { - const MACROBLOCKD *const xd = &x->e_mbd; - static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { - 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, - }; - int i, s, t; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const int last_is_4 = num_candidates[0] == 4; - int br, bc; - int bestsad = INT_MAX; - int thissad; - int k = -1; - const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; - assert(search_param < MAX_MVSEARCH_STEPS); - int best_init_s = search_param_to_steps[search_param]; - // adjust ref_mv to make sure it is within MV range - clamp_mv(start_mv, x->mv_limits.col_min, x->mv_limits.col_max, - x->mv_limits.row_min, x->mv_limits.row_max); - br = start_mv->row; - bc = start_mv->col; - if (cost_list != NULL) { - cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = - INT_MAX; - } - - // Work out the start point for the search - bestsad = vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, start_mv), in_what->stride) + - mvsad_err_cost(x, start_mv, &fcenter_mv, sad_per_bit); - - // Search all possible scales upto the search param around the center point - // pick the scale of the point that is best as the starting scale of - // further steps around it. - if (do_init_search) { - s = best_init_s; - best_init_s = -1; - for (t = 0; t <= s; ++t) { - int best_site = -1; - if (check_bounds(&x->mv_limits, br, bc, 1 << t)) { - for (i = 0; i < num_candidates[t]; i++) { - const MV this_mv = { br + candidates[t][i].row, - bc + candidates[t][i].col }; - thissad = - vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), in_what->stride); - CHECK_BETTER - } - } else { - for (i = 0; i < num_candidates[t]; i++) { - const MV this_mv = { br + candidates[t][i].row, - bc + candidates[t][i].col }; - if (!is_mv_in(&x->mv_limits, &this_mv)) continue; - thissad = - vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), in_what->stride); - CHECK_BETTER - } - } - if (best_site == -1) { - continue; - } else { - best_init_s = t; - k = best_site; - } - } - if (best_init_s != -1) { - br += candidates[best_init_s][k].row; - bc += candidates[best_init_s][k].col; - } - } - - // If the center point is still the best, just skip this and move to - // the refinement step. - if (best_init_s != -1) { - const int last_s = (last_is_4 && cost_list != NULL); - int best_site = -1; - s = best_init_s; - - for (; s >= last_s; s--) { - // No need to search all points the 1st time if initial search was used - if (!do_init_search || s != best_init_s) { - if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { - for (i = 0; i < num_candidates[s]; i++) { - const MV this_mv = { br + candidates[s][i].row, - bc + candidates[s][i].col }; - thissad = - vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), in_what->stride); - CHECK_BETTER - } - } else { - for (i = 0; i < num_candidates[s]; i++) { - const MV this_mv = { br + candidates[s][i].row, - bc + candidates[s][i].col }; - if (!is_mv_in(&x->mv_limits, &this_mv)) continue; - thissad = - vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), in_what->stride); - CHECK_BETTER - } - } - - if (best_site == -1) { - continue; - } else { - br += candidates[s][best_site].row; - bc += candidates[s][best_site].col; - k = best_site; - } - } - - do { - int next_chkpts_indices[PATTERN_CANDIDATES_REF]; - best_site = -1; - next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; - next_chkpts_indices[1] = k; - next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; - - if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { - for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { - const MV this_mv = { - br + candidates[s][next_chkpts_indices[i]].row, - bc + candidates[s][next_chkpts_indices[i]].col - }; - thissad = - vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), in_what->stride); - CHECK_BETTER - } - } else { - for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { - const MV this_mv = { - br + candidates[s][next_chkpts_indices[i]].row, - bc + candidates[s][next_chkpts_indices[i]].col - }; - if (!is_mv_in(&x->mv_limits, &this_mv)) continue; - thissad = - vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), in_what->stride); - CHECK_BETTER - } - } - - if (best_site != -1) { - k = next_chkpts_indices[best_site]; - br += candidates[s][k].row; - bc += candidates[s][k].col; - } - } while (best_site != -1); - } - - // Note: If we enter the if below, then cost_list must be non-NULL. - if (s == 0) { - cost_list[0] = bestsad; - if (!do_init_search || s != best_init_s) { - if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { - for (i = 0; i < num_candidates[s]; i++) { - const MV this_mv = { br + candidates[s][i].row, - bc + candidates[s][i].col }; - cost_list[i + 1] = thissad = - vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), in_what->stride); - CHECK_BETTER - } - } else { - for (i = 0; i < num_candidates[s]; i++) { - const MV this_mv = { br + candidates[s][i].row, - bc + candidates[s][i].col }; - if (!is_mv_in(&x->mv_limits, &this_mv)) continue; - cost_list[i + 1] = thissad = - vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), in_what->stride); - CHECK_BETTER - } - } - - if (best_site != -1) { - br += candidates[s][best_site].row; - bc += candidates[s][best_site].col; - k = best_site; - } - } - while (best_site != -1) { - int next_chkpts_indices[PATTERN_CANDIDATES_REF]; - best_site = -1; - next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; - next_chkpts_indices[1] = k; - next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; - cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; - cost_list[((k + 2) % 4) + 1] = cost_list[0]; - cost_list[0] = bestsad; - - if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { - for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { - const MV this_mv = { - br + candidates[s][next_chkpts_indices[i]].row, - bc + candidates[s][next_chkpts_indices[i]].col - }; - cost_list[next_chkpts_indices[i] + 1] = thissad = - vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), in_what->stride); - CHECK_BETTER - } - } else { - for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { - const MV this_mv = { - br + candidates[s][next_chkpts_indices[i]].row, - bc + candidates[s][next_chkpts_indices[i]].col - }; - if (!is_mv_in(&x->mv_limits, &this_mv)) { - cost_list[next_chkpts_indices[i] + 1] = INT_MAX; - continue; - } - cost_list[next_chkpts_indices[i] + 1] = thissad = - vfp->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &this_mv), in_what->stride); - CHECK_BETTER - } - } - - if (best_site != -1) { - k = next_chkpts_indices[best_site]; - br += candidates[s][k].row; - bc += candidates[s][k].col; - } - } - } - } - - // Returns the one-away integer pel cost/sad around the best as follows: - // cost_list[0]: cost/sad at the best integer pel - // cost_list[1]: cost/sad at delta {0, -1} (left) from the best integer pel - // cost_list[2]: cost/sad at delta { 1, 0} (bottom) from the best integer pel - // cost_list[3]: cost/sad at delta { 0, 1} (right) from the best integer pel - // cost_list[4]: cost/sad at delta {-1, 0} (top) from the best integer pel - if (cost_list) { - const MV best_int_mv = { br, bc }; - if (last_is_4) { - calc_int_sad_list(x, center_mv, sad_per_bit, vfp, &best_int_mv, cost_list, - use_mvcost, bestsad); - } else { - calc_int_cost_list(x, center_mv, sad_per_bit, vfp, &best_int_mv, - cost_list); - } - } - x->best_mv.as_mv.row = br; - x->best_mv.as_mv.col = bc; - return bestsad; -} - -int av1_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv, - const MV *center_mv, const aom_variance_fn_ptr_t *vfp, - int use_mvcost) { - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const MV mv = { best_mv->row * 8, best_mv->col * 8 }; - unsigned int unused; - - return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), - in_what->stride, &unused) + - (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, - x->errorperbit) - : 0); -} - -int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv, - const MV *center_mv, const uint8_t *second_pred, - const aom_variance_fn_ptr_t *vfp, int use_mvcost) { - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const MV mv = { best_mv->row * 8, best_mv->col * 8 }; - unsigned int unused; - - if (xd->jcp_param.use_jnt_comp_avg) - return vfp->jsvaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, - what->buf, what->stride, &unused, second_pred, - &xd->jcp_param) + - (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, - x->errorperbit) - : 0); - else - return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, - what->buf, what->stride, &unused, second_pred) + - (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, - x->errorperbit) - : 0); -} - -int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv, - const MV *center_mv, const uint8_t *second_pred, - const uint8_t *mask, int mask_stride, - int invert_mask, const aom_variance_fn_ptr_t *vfp, - int use_mvcost) { - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const MV mv = { best_mv->row * 8, best_mv->col * 8 }; - unsigned int unused; - - return vfp->msvf(what->buf, what->stride, 0, 0, - get_buf_from_mv(in_what, best_mv), in_what->stride, - second_pred, mask, mask_stride, invert_mask, &unused) + - (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, - x->errorperbit) - : 0); -} - -int av1_hex_search(MACROBLOCK *x, MV *start_mv, int search_param, - int sad_per_bit, int do_init_search, int *cost_list, - const aom_variance_fn_ptr_t *vfp, int use_mvcost, - const MV *center_mv) { - // First scale has 8-closest points, the rest have 6 points in hex shape - // at increasing scales - static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6 }; - // Note that the largest candidate step at each scale is 2^scale - /* clang-format off */ - static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { - { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 }, - { -1, 0 } }, - { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } }, - { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } }, - { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } }, - { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } }, - { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 }, - { -32, 0 } }, - { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 }, - { -64, 0 } }, - { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 }, - { -128, 0 } }, - { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 }, - { -256, 0 } }, - { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 }, - { -512, 0 } }, - { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 }, - { -512, 1024 }, { -1024, 0 } }, - }; - /* clang-format on */ - return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search, - cost_list, vfp, use_mvcost, center_mv, - hex_num_candidates, hex_candidates); -} - -static int bigdia_search(MACROBLOCK *x, MV *start_mv, int search_param, - int sad_per_bit, int do_init_search, int *cost_list, - const aom_variance_fn_ptr_t *vfp, int use_mvcost, - const MV *center_mv) { - // First scale has 4-closest points, the rest have 8 points in diamond - // shape at increasing scales - static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { - 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - }; - // Note that the largest candidate step at each scale is 2^scale - /* clang-format off */ - static const MV - bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { - { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }, - { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 }, - { -1, 1 }, { -2, 0 } }, - { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 }, - { -2, 2 }, { -4, 0 } }, - { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 }, - { -4, 4 }, { -8, 0 } }, - { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 }, - { -8, 8 }, { -16, 0 } }, - { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 }, - { 0, 32 }, { -16, 16 }, { -32, 0 } }, - { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 }, - { 0, 64 }, { -32, 32 }, { -64, 0 } }, - { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 }, - { 0, 128 }, { -64, 64 }, { -128, 0 } }, - { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 }, - { 0, 256 }, { -128, 128 }, { -256, 0 } }, - { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 }, - { 0, 512 }, { -256, 256 }, { -512, 0 } }, - { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 }, - { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } }, - }; - /* clang-format on */ - return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search, - cost_list, vfp, use_mvcost, center_mv, - bigdia_num_candidates, bigdia_candidates); -} - -static int square_search(MACROBLOCK *x, MV *start_mv, int search_param, - int sad_per_bit, int do_init_search, int *cost_list, - const aom_variance_fn_ptr_t *vfp, int use_mvcost, - const MV *center_mv) { - // All scales have 8 closest points in square shape - static const int square_num_candidates[MAX_PATTERN_SCALES] = { - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - }; - // Note that the largest candidate step at each scale is 2^scale - /* clang-format off */ - static const MV - square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { - { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, - { -1, 1 }, { -1, 0 } }, - { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 }, - { -2, 2 }, { -2, 0 } }, - { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 }, - { -4, 4 }, { -4, 0 } }, - { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 }, - { -8, 8 }, { -8, 0 } }, - { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 }, - { 0, 16 }, { -16, 16 }, { -16, 0 } }, - { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 }, - { 0, 32 }, { -32, 32 }, { -32, 0 } }, - { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 }, - { 0, 64 }, { -64, 64 }, { -64, 0 } }, - { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 }, - { 0, 128 }, { -128, 128 }, { -128, 0 } }, - { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 }, - { 0, 256 }, { -256, 256 }, { -256, 0 } }, - { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 }, - { 0, 512 }, { -512, 512 }, { -512, 0 } }, - { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 }, - { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } }, - }; - /* clang-format on */ - return pattern_search(x, start_mv, search_param, sad_per_bit, do_init_search, - cost_list, vfp, use_mvcost, center_mv, - square_num_candidates, square_candidates); -} - -static int fast_hex_search(MACROBLOCK *x, MV *ref_mv, int search_param, - int sad_per_bit, - int do_init_search, // must be zero for fast_hex - int *cost_list, const aom_variance_fn_ptr_t *vfp, - int use_mvcost, const MV *center_mv) { - return av1_hex_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param), - sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, - center_mv); -} - -static int fast_dia_search(MACROBLOCK *x, MV *ref_mv, int search_param, - int sad_per_bit, int do_init_search, int *cost_list, - const aom_variance_fn_ptr_t *vfp, int use_mvcost, - const MV *center_mv) { - return bigdia_search(x, ref_mv, AOMMAX(MAX_MVSEARCH_STEPS - 2, search_param), - sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, - center_mv); -} - -#undef CHECK_BETTER - -// Exhuastive motion search around a given centre position with a given -// step size. -static int exhuastive_mesh_search(MACROBLOCK *x, MV *ref_mv, MV *best_mv, - int range, int step, int sad_per_bit, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *center_mv) { - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - MV fcenter_mv = { center_mv->row, center_mv->col }; - unsigned int best_sad = INT_MAX; - int r, c, i; - int start_col, end_col, start_row, end_row; - int col_step = (step > 1) ? step : 4; - - assert(step >= 1); - - clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max, - x->mv_limits.row_min, x->mv_limits.row_max); - *best_mv = fcenter_mv; - best_sad = - fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) + - mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit); - start_row = AOMMAX(-range, x->mv_limits.row_min - fcenter_mv.row); - start_col = AOMMAX(-range, x->mv_limits.col_min - fcenter_mv.col); - end_row = AOMMIN(range, x->mv_limits.row_max - fcenter_mv.row); - end_col = AOMMIN(range, x->mv_limits.col_max - fcenter_mv.col); - - for (r = start_row; r <= end_row; r += step) { - for (c = start_col; c <= end_col; c += col_step) { - // Step > 1 means we are not checking every location in this pass. - if (step > 1) { - const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c }; - unsigned int sad = - fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), - in_what->stride); - if (sad < best_sad) { - sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - x->second_best_mv.as_mv = *best_mv; - *best_mv = mv; - } - } - } else { - // 4 sads in a single call if we are checking every location - if (c + 3 <= end_col) { - unsigned int sads[4]; - const uint8_t *addrs[4]; - for (i = 0; i < 4; ++i) { - const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; - addrs[i] = get_buf_from_mv(in_what, &mv); - } - fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); - - for (i = 0; i < 4; ++i) { - if (sads[i] < best_sad) { - const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; - const unsigned int sad = - sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - x->second_best_mv.as_mv = *best_mv; - *best_mv = mv; - } - } - } - } else { - for (i = 0; i < end_col - c; ++i) { - const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; - unsigned int sad = - fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride); - if (sad < best_sad) { - sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - x->second_best_mv.as_mv = *best_mv; - *best_mv = mv; - } - } - } - } - } - } - } - - return best_sad; -} - -int av1_diamond_search_sad_c(MACROBLOCK *x, const search_site_config *cfg, - MV *ref_mv, MV *best_mv, int search_param, - int sad_per_bit, int *num00, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *center_mv) { - int i, j, step; - - const MACROBLOCKD *const xd = &x->e_mbd; - uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - const uint8_t *in_what; - const int in_what_stride = xd->plane[0].pre[0].stride; - const uint8_t *best_address; - - unsigned int bestsad = INT_MAX; - int best_site = 0; - int last_site = 0; - - int ref_row; - int ref_col; - - // search_param determines the length of the initial step and hence the number - // of iterations. - // 0 = initial step (MAX_FIRST_STEP) pel - // 1 = (MAX_FIRST_STEP/2) pel, - // 2 = (MAX_FIRST_STEP/4) pel... - const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; - const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param; - - const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; - clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, - x->mv_limits.row_min, x->mv_limits.row_max); - ref_row = ref_mv->row; - ref_col = ref_mv->col; - *num00 = 0; - best_mv->row = ref_row; - best_mv->col = ref_col; - - // Work out the start point for the search - in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; - best_address = in_what; - - // Check the starting position - bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + - mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); - - i = 1; - - for (step = 0; step < tot_steps; step++) { - int all_in = 1, t; - - // All_in is true if every one of the points we are checking are within - // the bounds of the image. - all_in &= ((best_mv->row + ss[i].mv.row) > x->mv_limits.row_min); - all_in &= ((best_mv->row + ss[i + 1].mv.row) < x->mv_limits.row_max); - all_in &= ((best_mv->col + ss[i + 2].mv.col) > x->mv_limits.col_min); - all_in &= ((best_mv->col + ss[i + 3].mv.col) < x->mv_limits.col_max); - - // If all the pixels are within the bounds we don't check whether the - // search point is valid in this loop, otherwise we check each point - // for validity.. - if (all_in) { - unsigned int sad_array[4]; - - for (j = 0; j < cfg->searches_per_step; j += 4) { - unsigned char const *block_offset[4]; - - for (t = 0; t < 4; t++) - block_offset[t] = ss[i + t].offset + best_address; - - fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, - sad_array); - - for (t = 0; t < 4; t++, i++) { - if (sad_array[t] < bestsad) { - const MV this_mv = { best_mv->row + ss[i].mv.row, - best_mv->col + ss[i].mv.col }; - sad_array[t] += - mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (sad_array[t] < bestsad) { - bestsad = sad_array[t]; - best_site = i; - } - } - } - } - } else { - for (j = 0; j < cfg->searches_per_step; j++) { - // Trap illegal vectors - const MV this_mv = { best_mv->row + ss[i].mv.row, - best_mv->col + ss[i].mv.col }; - - if (is_mv_in(&x->mv_limits, &this_mv)) { - const uint8_t *const check_here = ss[i].offset + best_address; - unsigned int thissad = - fn_ptr->sdf(what, what_stride, check_here, in_what_stride); - - if (thissad < bestsad) { - thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; - best_site = i; - } - } - } - i++; - } - } - if (best_site != last_site) { - x->second_best_mv.as_mv = *best_mv; - best_mv->row += ss[best_site].mv.row; - best_mv->col += ss[best_site].mv.col; - best_address += ss[best_site].offset; - last_site = best_site; -#if defined(NEW_DIAMOND_SEARCH) - while (1) { - const MV this_mv = { best_mv->row + ss[best_site].mv.row, - best_mv->col + ss[best_site].mv.col }; - if (is_mv_in(&x->mv_limits, &this_mv)) { - const uint8_t *const check_here = ss[best_site].offset + best_address; - unsigned int thissad = - fn_ptr->sdf(what, what_stride, check_here, in_what_stride); - if (thissad < bestsad) { - thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (thissad < bestsad) { - bestsad = thissad; - best_mv->row += ss[best_site].mv.row; - best_mv->col += ss[best_site].mv.col; - best_address += ss[best_site].offset; - continue; - } - } - } - break; - } -#endif - } else if (best_address == in_what) { - (*num00)++; - } - } - return bestsad; -} - -/* do_refine: If last step (1-away) of n-step search doesn't pick the center - point as the best match, we will do a final 1-away diamond - refining search */ -static int full_pixel_diamond(const AV1_COMP *const cpi, MACROBLOCK *x, - MV *mvp_full, int step_param, int sadpb, - int further_steps, int do_refine, int *cost_list, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *ref_mv) { - MV temp_mv; - int thissme, n, num00 = 0; - int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, - step_param, sadpb, &n, fn_ptr, ref_mv); - if (bestsme < INT_MAX) - bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); - x->best_mv.as_mv = temp_mv; - - // If there won't be more n-step search, check to see if refining search is - // needed. - if (n > further_steps) do_refine = 0; - - while (n < further_steps) { - ++n; - - if (num00) { - num00--; - } else { - thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, - step_param + n, sadpb, &num00, fn_ptr, - ref_mv); - if (thissme < INT_MAX) - thissme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); - - // check to see if refining search is needed. - if (num00 > further_steps - n) do_refine = 0; - - if (thissme < bestsme) { - bestsme = thissme; - x->best_mv.as_mv = temp_mv; - } - } - } - - // final 1-away diamond refining search - if (do_refine) { - const int search_range = 8; - MV best_mv = x->best_mv.as_mv; - thissme = av1_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr, - ref_mv); - if (thissme < INT_MAX) - thissme = av1_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); - if (thissme < bestsme) { - bestsme = thissme; - x->best_mv.as_mv = best_mv; - } - } - - // Return cost list. - if (cost_list) { - calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, &x->best_mv.as_mv, cost_list); - } - return bestsme; -} - -#define MIN_RANGE 7 -#define MAX_RANGE 256 -#define MIN_INTERVAL 1 -// Runs an limited range exhaustive mesh search using a pattern set -// according to the encode speed profile. -static int full_pixel_exhaustive(const AV1_COMP *const cpi, MACROBLOCK *x, - const MV *centre_mv_full, int sadpb, - int *cost_list, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *ref_mv, MV *dst_mv) { - const SPEED_FEATURES *const sf = &cpi->sf; - MV temp_mv = { centre_mv_full->row, centre_mv_full->col }; - MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; - int bestsme; - int i; - int interval = sf->mesh_patterns[0].interval; - int range = sf->mesh_patterns[0].range; - int baseline_interval_divisor; - - // Keep track of number of exhaustive calls (this frame in this thread). - ++(*x->ex_search_count_ptr); - - // Trap illegal values for interval and range for this function. - if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) || - (interval > range)) - return INT_MAX; - - baseline_interval_divisor = range / interval; - - // Check size of proposed first range against magnitude of the centre - // value used as a starting point. - range = AOMMAX(range, (5 * AOMMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4); - range = AOMMIN(range, MAX_RANGE); - interval = AOMMAX(interval, range / baseline_interval_divisor); - - // initial search - bestsme = exhuastive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval, - sadpb, fn_ptr, &temp_mv); - - if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { - // Progressive searches with range and step size decreasing each time - // till we reach a step size of 1. Then break out. - for (i = 1; i < MAX_MESH_STEP; ++i) { - // First pass with coarser step and longer range - bestsme = exhuastive_mesh_search( - x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range, - sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv); - - if (sf->mesh_patterns[i].interval == 1) break; - } - } - - if (bestsme < INT_MAX) - bestsme = av1_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); - *dst_mv = temp_mv; - - // Return cost list. - if (cost_list) { - calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); - } - return bestsme; -} - -int av1_refining_search_sad(MACROBLOCK *x, MV *ref_mv, int error_per_bit, - int search_range, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *center_mv) { - const MACROBLOCKD *const xd = &x->e_mbd; - const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; - const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); - unsigned int best_sad = - fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) + - mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); - int i, j; - - for (i = 0; i < search_range; i++) { - int best_site = -1; - const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) & - ((ref_mv->row + 1) < x->mv_limits.row_max) & - ((ref_mv->col - 1) > x->mv_limits.col_min) & - ((ref_mv->col + 1) < x->mv_limits.col_max); - - if (all_in) { - unsigned int sads[4]; - const uint8_t *const positions[4] = { best_address - in_what->stride, - best_address - 1, best_address + 1, - best_address + in_what->stride }; - - fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); - - for (j = 0; j < 4; ++j) { - if (sads[j] < best_sad) { - const MV mv = { ref_mv->row + neighbors[j].row, - ref_mv->col + neighbors[j].col }; - sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); - if (sads[j] < best_sad) { - best_sad = sads[j]; - best_site = j; - } - } - } - } else { - for (j = 0; j < 4; ++j) { - const MV mv = { ref_mv->row + neighbors[j].row, - ref_mv->col + neighbors[j].col }; - - if (is_mv_in(&x->mv_limits, &mv)) { - unsigned int sad = - fn_ptr->sdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride); - if (sad < best_sad) { - sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); - if (sad < best_sad) { - best_sad = sad; - best_site = j; - } - } - } - } - } - - if (best_site == -1) { - break; - } else { - x->second_best_mv.as_mv = *ref_mv; - ref_mv->row += neighbors[best_site].row; - ref_mv->col += neighbors[best_site].col; - best_address = get_buf_from_mv(in_what, ref_mv); - } - } - - return best_sad; -} - -// This function is called when we do joint motion search in comp_inter_inter -// mode, or when searching for one component of an ext-inter compound mode. -int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range, - const aom_variance_fn_ptr_t *fn_ptr, - const uint8_t *mask, int mask_stride, - int invert_mask, const MV *center_mv, - const uint8_t *second_pred) { - static const search_neighbors neighbors[8] = { - { { -1, 0 }, -1 * SEARCH_GRID_STRIDE_8P + 0 }, - { { 0, -1 }, 0 * SEARCH_GRID_STRIDE_8P - 1 }, - { { 0, 1 }, 0 * SEARCH_GRID_STRIDE_8P + 1 }, - { { 1, 0 }, 1 * SEARCH_GRID_STRIDE_8P + 0 }, - { { -1, -1 }, -1 * SEARCH_GRID_STRIDE_8P - 1 }, - { { 1, -1 }, 1 * SEARCH_GRID_STRIDE_8P - 1 }, - { { -1, 1 }, -1 * SEARCH_GRID_STRIDE_8P + 1 }, - { { 1, 1 }, 1 * SEARCH_GRID_STRIDE_8P + 1 } - }; - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const what = &x->plane[0].src; - const struct buf_2d *const in_what = &xd->plane[0].pre[0]; - const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; - MV *best_mv = &x->best_mv.as_mv; - unsigned int best_sad = INT_MAX; - int i, j; - uint8_t do_refine_search_grid[SEARCH_GRID_STRIDE_8P * SEARCH_GRID_STRIDE_8P] = - { 0 }; - int grid_center = SEARCH_GRID_CENTER_8P; - int grid_coord = grid_center; - - clamp_mv(best_mv, x->mv_limits.col_min, x->mv_limits.col_max, - x->mv_limits.row_min, x->mv_limits.row_max); - if (mask) { - best_sad = fn_ptr->msdf(what->buf, what->stride, - get_buf_from_mv(in_what, best_mv), in_what->stride, - second_pred, mask, mask_stride, invert_mask) + - mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit); - } else { - if (xd->jcp_param.use_jnt_comp_avg) - best_sad = fn_ptr->jsdaf(what->buf, what->stride, - get_buf_from_mv(in_what, best_mv), - in_what->stride, second_pred, &xd->jcp_param) + - mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit); - else - best_sad = fn_ptr->sdaf(what->buf, what->stride, - get_buf_from_mv(in_what, best_mv), - in_what->stride, second_pred) + - mvsad_err_cost(x, best_mv, &fcenter_mv, error_per_bit); - } - - do_refine_search_grid[grid_coord] = 1; - - for (i = 0; i < search_range; ++i) { - int best_site = -1; - - for (j = 0; j < 8; ++j) { - grid_coord = grid_center + neighbors[j].coord_offset; - if (do_refine_search_grid[grid_coord] == 1) { - continue; - } - const MV mv = { best_mv->row + neighbors[j].coord.row, - best_mv->col + neighbors[j].coord.col }; - - do_refine_search_grid[grid_coord] = 1; - if (is_mv_in(&x->mv_limits, &mv)) { - unsigned int sad; - if (mask) { - sad = fn_ptr->msdf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, - second_pred, mask, mask_stride, invert_mask); - } else { - if (xd->jcp_param.use_jnt_comp_avg) - sad = fn_ptr->jsdaf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, - second_pred, &xd->jcp_param); - else - sad = fn_ptr->sdaf(what->buf, what->stride, - get_buf_from_mv(in_what, &mv), in_what->stride, - second_pred); - } - if (sad < best_sad) { - sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); - if (sad < best_sad) { - best_sad = sad; - best_site = j; - } - } - } - } - - if (best_site == -1) { - break; - } else { - best_mv->row += neighbors[best_site].coord.row; - best_mv->col += neighbors[best_site].coord.col; - grid_center += neighbors[best_site].coord_offset; - } - } - return best_sad; -} - -#define MIN_EX_SEARCH_LIMIT 128 -static int is_exhaustive_allowed(const AV1_COMP *const cpi, MACROBLOCK *x) { - const SPEED_FEATURES *const sf = &cpi->sf; - const int max_ex = - AOMMAX(MIN_EX_SEARCH_LIMIT, - (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100); - - return sf->allow_exhaustive_searches && - (sf->exhaustive_searches_thresh < INT_MAX) && - (*x->ex_search_count_ptr <= max_ex) && !cpi->rc.is_src_frame_alt_ref; -} - -int av1_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, - MV *mvp_full, int step_param, int method, - int run_mesh_search, int error_per_bit, - int *cost_list, const MV *ref_mv, int var_max, int rd, - int x_pos, int y_pos, int intra) { - const SPEED_FEATURES *const sf = &cpi->sf; - const aom_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; - int var = 0; - - if (cost_list) { - cost_list[0] = INT_MAX; - cost_list[1] = INT_MAX; - cost_list[2] = INT_MAX; - cost_list[3] = INT_MAX; - cost_list[4] = INT_MAX; - } - - // Keep track of number of searches (this frame in this thread). - ++(*x->m_search_count_ptr); - - switch (method) { - case FAST_DIAMOND: - var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, - cost_list, fn_ptr, 1, ref_mv); - break; - case FAST_HEX: - var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, - cost_list, fn_ptr, 1, ref_mv); - break; - case HEX: - var = av1_hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, - fn_ptr, 1, ref_mv); - break; - case SQUARE: - var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, - fn_ptr, 1, ref_mv); - break; - case BIGDIA: - var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, - fn_ptr, 1, ref_mv); - break; - case NSTEP: - var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, - MAX_MVSEARCH_STEPS - 1 - step_param, 1, - cost_list, fn_ptr, ref_mv); - - // Should we allow a follow on exhaustive search? - if (is_exhaustive_allowed(cpi, x)) { - int exhuastive_thr = sf->exhaustive_searches_thresh; - exhuastive_thr >>= - 10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]); - - // Threshold variance for an exhaustive full search. - if (var > exhuastive_thr) { - int var_ex; - MV tmp_mv_ex; - var_ex = - full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit, - cost_list, fn_ptr, ref_mv, &tmp_mv_ex); - - if (var_ex < var) { - var = var_ex; - x->best_mv.as_mv = tmp_mv_ex; - } - } - } - break; - default: assert(0 && "Invalid search method."); - } - - // Should we allow a follow on exhaustive search? - if (!run_mesh_search) { - if (method == NSTEP) { - if (is_exhaustive_allowed(cpi, x)) { - int exhuastive_thr = sf->exhaustive_searches_thresh; - exhuastive_thr >>= - 10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]); - // Threshold variance for an exhaustive full search. - if (var > exhuastive_thr) run_mesh_search = 1; - } - } - } - - if (run_mesh_search) { - int var_ex; - MV tmp_mv_ex; - var_ex = full_pixel_exhaustive(cpi, x, &x->best_mv.as_mv, error_per_bit, - cost_list, fn_ptr, ref_mv, &tmp_mv_ex); - if (var_ex < var) { - var = var_ex; - x->best_mv.as_mv = tmp_mv_ex; - } - } - - if (method != NSTEP && rd && var < var_max) - var = av1_get_mvpred_var(x, &x->best_mv.as_mv, ref_mv, fn_ptr, 1); - - do { - if (!intra || !av1_use_hash_me(&cpi->common)) break; - - // already single ME - // get block size and original buffer of current block - const int block_height = block_size_high[bsize]; - const int block_width = block_size_wide[bsize]; - if (block_height == block_width && x_pos >= 0 && y_pos >= 0) { - if (block_width == 4 || block_width == 8 || block_width == 16 || - block_width == 32 || block_width == 64 || block_width == 128) { - uint8_t *what = x->plane[0].src.buf; - const int what_stride = x->plane[0].src.stride; - uint32_t hash_value1, hash_value2; - MV best_hash_mv; - int best_hash_cost = INT_MAX; - - // for the hashMap - hash_table *ref_frame_hash = - intra - ? &cpi->common.cur_frame->hash_table - : av1_get_ref_frame_hash_map(cpi, x->e_mbd.mi[0]->ref_frame[0]); - - av1_get_block_hash_value( - what, what_stride, block_width, &hash_value1, &hash_value2, - x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH, x); - - const int count = av1_hash_table_count(ref_frame_hash, hash_value1); - // for intra, at lest one matching can be found, itself. - if (count <= (intra ? 1 : 0)) { - break; - } - - Iterator iterator = - av1_hash_get_first_iterator(ref_frame_hash, hash_value1); - for (int i = 0; i < count; i++, iterator_increment(&iterator)) { - block_hash ref_block_hash = *(block_hash *)(iterator_get(&iterator)); - if (hash_value2 == ref_block_hash.hash_value2) { - // For intra, make sure the prediction is from valid area. - if (intra) { - const int mi_col = x_pos / MI_SIZE; - const int mi_row = y_pos / MI_SIZE; - const MV dv = { 8 * (ref_block_hash.y - y_pos), - 8 * (ref_block_hash.x - x_pos) }; - if (!av1_is_dv_valid(dv, &cpi->common, &x->e_mbd, mi_row, mi_col, - bsize, cpi->common.seq_params.mib_size_log2)) - continue; - } - MV hash_mv; - hash_mv.col = ref_block_hash.x - x_pos; - hash_mv.row = ref_block_hash.y - y_pos; - if (!is_mv_in(&x->mv_limits, &hash_mv)) continue; - const int refCost = - av1_get_mvpred_var(x, &hash_mv, ref_mv, fn_ptr, 1); - if (refCost < best_hash_cost) { - best_hash_cost = refCost; - best_hash_mv = hash_mv; - } - } - } - if (best_hash_cost < var) { - x->second_best_mv = x->best_mv; - x->best_mv.as_mv = best_hash_mv; - var = best_hash_cost; - } - } - } - } while (0); - - return var; -} - -/* returns subpixel variance error function */ -#define DIST(r, c) \ - vfp->osvf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, mask, &sse) - -/* checks if (r, c) has better score than previous best */ -#define MVC(r, c) \ - (unsigned int)(mvcost \ - ? ((mvjcost[((r) != rr) * 2 + ((c) != rc)] + \ - mvcost[0][((r)-rr)] + (int64_t)mvcost[1][((c)-rc)]) * \ - error_per_bit + \ - 4096) >> \ - 13 \ - : 0) - -#define CHECK_BETTER(v, r, c) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ - thismse = (DIST(r, c)); \ - if ((v = MVC(r, c) + thismse) < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ - } - -#undef CHECK_BETTER0 -#define CHECK_BETTER0(v, r, c) CHECK_BETTER(v, r, c) - -#undef CHECK_BETTER1 -#define CHECK_BETTER1(v, r, c) \ - if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ - MV this_mv = { r, c }; \ - thismse = upsampled_obmc_pref_error(xd, cm, mi_row, mi_col, &this_mv, \ - mask, vfp, z, pre(y, y_stride, r, c), \ - y_stride, sp(c), sp(r), w, h, &sse, \ - use_accurate_subpel_search); \ - if ((v = MVC(r, c) + thismse) < besterr) { \ - besterr = v; \ - br = r; \ - bc = c; \ - *distortion = thismse; \ - *sse1 = sse; \ - } \ - } else { \ - v = INT_MAX; \ - } - -static unsigned int setup_obmc_center_error( - const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc, - const uint8_t *const y, int y_stride, int offset, int *mvjcost, - int *mvcost[2], unsigned int *sse1, int *distortion) { - unsigned int besterr; - besterr = vfp->ovf(y + offset, y_stride, wsrc, mask, sse1); - *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); - return besterr; -} - -static int upsampled_obmc_pref_error( - MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col, - const MV *const mv, const int32_t *mask, const aom_variance_fn_ptr_t *vfp, - const int32_t *const wsrc, const uint8_t *const y, int y_stride, - int subpel_x_q3, int subpel_y_q3, int w, int h, unsigned int *sse, - int subpel_search) { - unsigned int besterr; - - DECLARE_ALIGNED(16, uint8_t, pred[2 * MAX_SB_SQUARE]); - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - uint8_t *pred8 = CONVERT_TO_BYTEPTR(pred); - aom_highbd_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred8, w, h, - subpel_x_q3, subpel_y_q3, y, y_stride, xd->bd, - subpel_search); - besterr = vfp->ovf(pred8, w, wsrc, mask, sse); - } else { - aom_upsampled_pred(xd, cm, mi_row, mi_col, mv, pred, w, h, subpel_x_q3, - subpel_y_q3, y, y_stride, subpel_search); - - besterr = vfp->ovf(pred, w, wsrc, mask, sse); - } - return besterr; -} - -static unsigned int upsampled_setup_obmc_center_error( - MACROBLOCKD *xd, const AV1_COMMON *const cm, int mi_row, int mi_col, - const int32_t *mask, const MV *bestmv, const MV *ref_mv, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, const int32_t *const wsrc, - const uint8_t *const y, int y_stride, int w, int h, int offset, - int *mvjcost, int *mvcost[2], unsigned int *sse1, int *distortion, - int subpel_search) { - unsigned int besterr = upsampled_obmc_pref_error( - xd, cm, mi_row, mi_col, bestmv, mask, vfp, wsrc, y + offset, y_stride, 0, - 0, w, h, sse1, subpel_search); - *distortion = besterr; - besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); - return besterr; -} - -int av1_find_best_obmc_sub_pixel_tree_up( - MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col, - MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, - int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, - int is_second, int use_accurate_subpel_search) { - const int32_t *wsrc = x->wsrc_buf; - const int32_t *mask = x->mask_buf; - const int *const z = wsrc; - const int *const src_address = z; - MACROBLOCKD *xd = &x->e_mbd; - struct macroblockd_plane *const pd = &xd->plane[0]; - MB_MODE_INFO *mbmi = xd->mi[0]; - unsigned int besterr = INT_MAX; - unsigned int sse; - unsigned int thismse; - - int rr = ref_mv->row; - int rc = ref_mv->col; - int br = bestmv->row * 8; - int bc = bestmv->col * 8; - int hstep = 4; - int iter; - int round = 3 - forced_stop; - int tr = br; - int tc = bc; - const MV *search_step = search_step_table; - int idx, best_idx = -1; - unsigned int cost_array[5]; - int kr, kc; - const int w = block_size_wide[mbmi->sb_type]; - const int h = block_size_high[mbmi->sb_type]; - int offset; - int y_stride; - const uint8_t *y; - - int minc, maxc, minr, maxr; - - set_subpel_mv_search_range(&x->mv_limits, &minc, &maxc, &minr, &maxr, ref_mv); - - y = pd->pre[is_second].buf; - y_stride = pd->pre[is_second].stride; - offset = bestmv->row * y_stride + bestmv->col; - - if (!allow_hp) - if (round == 3) round = 2; - - bestmv->row *= 8; - bestmv->col *= 8; - // use_accurate_subpel_search can be 0 or 1 or 2 - if (use_accurate_subpel_search) - besterr = upsampled_setup_obmc_center_error( - xd, cm, mi_row, mi_col, mask, bestmv, ref_mv, error_per_bit, vfp, z, y, - y_stride, w, h, offset, mvjcost, mvcost, sse1, distortion, - use_accurate_subpel_search); - else - besterr = setup_obmc_center_error(mask, bestmv, ref_mv, error_per_bit, vfp, - z, y, y_stride, offset, mvjcost, mvcost, - sse1, distortion); - - for (iter = 0; iter < round; ++iter) { - // Check vertical and horizontal sub-pixel positions. - for (idx = 0; idx < 4; ++idx) { - tr = br + search_step[idx].row; - tc = bc + search_step[idx].col; - if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { - MV this_mv = { tr, tc }; - if (use_accurate_subpel_search) { - thismse = upsampled_obmc_pref_error( - xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address, - pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse, - use_accurate_subpel_search); - } else { - thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), - sp(tr), src_address, mask, &sse); - } - - cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, - mvcost, error_per_bit); - if (cost_array[idx] < besterr) { - best_idx = idx; - besterr = cost_array[idx]; - *distortion = thismse; - *sse1 = sse; - } - } else { - cost_array[idx] = INT_MAX; - } - } - - // Check diagonal sub-pixel position - kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep); - kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep); - - tc = bc + kc; - tr = br + kr; - if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { - MV this_mv = { tr, tc }; - - if (use_accurate_subpel_search) { - thismse = upsampled_obmc_pref_error( - xd, cm, mi_row, mi_col, &this_mv, mask, vfp, src_address, - pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), w, h, &sse, - use_accurate_subpel_search); - } else { - thismse = vfp->osvf(pre(y, y_stride, tr, tc), y_stride, sp(tc), sp(tr), - src_address, mask, &sse); - } - - cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, - error_per_bit); - - if (cost_array[4] < besterr) { - best_idx = 4; - besterr = cost_array[4]; - *distortion = thismse; - *sse1 = sse; - } - } else { - cost_array[idx] = INT_MAX; - } - - if (best_idx < 4 && best_idx >= 0) { - br += search_step[best_idx].row; - bc += search_step[best_idx].col; - } else if (best_idx == 4) { - br = tr; - bc = tc; - } - - if (iters_per_step > 1 && best_idx != -1) { - if (use_accurate_subpel_search) { - SECOND_LEVEL_CHECKS_BEST(1); - } else { - SECOND_LEVEL_CHECKS_BEST(0); - } - } - - tr = br; - tc = bc; - - search_step += 4; - hstep >>= 1; - best_idx = -1; - } - - // These lines insure static analysis doesn't warn that - // tr and tc aren't used after the above point. - (void)tr; - (void)tc; - - bestmv->row = br; - bestmv->col = bc; - - return besterr; -} - -#undef DIST -#undef MVC -#undef CHECK_BETTER - -static int get_obmc_mvpred_var(const MACROBLOCK *x, const int32_t *wsrc, - const int32_t *mask, const MV *best_mv, - const MV *center_mv, - const aom_variance_fn_ptr_t *vfp, int use_mvcost, - int is_second) { - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const in_what = &xd->plane[0].pre[is_second]; - const MV mv = { best_mv->row * 8, best_mv->col * 8 }; - unsigned int unused; - - return vfp->ovf(get_buf_from_mv(in_what, best_mv), in_what->stride, wsrc, - mask, &unused) + - (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, - x->errorperbit) - : 0); -} - -int obmc_refining_search_sad(const MACROBLOCK *x, const int32_t *wsrc, - const int32_t *mask, MV *ref_mv, int error_per_bit, - int search_range, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *center_mv, int is_second) { - const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const in_what = &xd->plane[0].pre[is_second]; - const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; - unsigned int best_sad = fn_ptr->osdf(get_buf_from_mv(in_what, ref_mv), - in_what->stride, wsrc, mask) + - mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); - int i, j; - - for (i = 0; i < search_range; i++) { - int best_site = -1; - - for (j = 0; j < 4; j++) { - const MV mv = { ref_mv->row + neighbors[j].row, - ref_mv->col + neighbors[j].col }; - if (is_mv_in(&x->mv_limits, &mv)) { - unsigned int sad = fn_ptr->osdf(get_buf_from_mv(in_what, &mv), - in_what->stride, wsrc, mask); - if (sad < best_sad) { - sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); - if (sad < best_sad) { - best_sad = sad; - best_site = j; - } - } - } - } - - if (best_site == -1) { - break; - } else { - ref_mv->row += neighbors[best_site].row; - ref_mv->col += neighbors[best_site].col; - } - } - return best_sad; -} - -int obmc_diamond_search_sad(const MACROBLOCK *x, const search_site_config *cfg, - const int32_t *wsrc, const int32_t *mask, - MV *ref_mv, MV *best_mv, int search_param, - int sad_per_bit, int *num00, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *center_mv, int is_second) { - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const in_what = &xd->plane[0].pre[is_second]; - // search_param determines the length of the initial step and hence the number - // of iterations - // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = - // (MAX_FIRST_STEP/4) pel... etc. - const search_site *const ss = &cfg->ss[search_param * cfg->searches_per_step]; - const int tot_steps = (cfg->ss_count / cfg->searches_per_step) - search_param; - const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; - const uint8_t *best_address, *in_what_ref; - int best_sad = INT_MAX; - int best_site = 0; - int last_site = 0; - int i, j, step; - - clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, - x->mv_limits.row_min, x->mv_limits.row_max); - in_what_ref = in_what->buf + ref_mv->row * in_what->stride + ref_mv->col; - best_address = in_what_ref; - *num00 = 0; - *best_mv = *ref_mv; - - // Check the starting position - best_sad = fn_ptr->osdf(best_address, in_what->stride, wsrc, mask) + - mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); - - i = 1; - - for (step = 0; step < tot_steps; step++) { - for (j = 0; j < cfg->searches_per_step; j++) { - const MV mv = { best_mv->row + ss[i].mv.row, - best_mv->col + ss[i].mv.col }; - if (is_mv_in(&x->mv_limits, &mv)) { - int sad = fn_ptr->osdf(best_address + ss[i].offset, in_what->stride, - wsrc, mask); - if (sad < best_sad) { - sad += mvsad_err_cost(x, &mv, &fcenter_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - best_site = i; - } - } - } - - i++; - } - - if (best_site != last_site) { - best_mv->row += ss[best_site].mv.row; - best_mv->col += ss[best_site].mv.col; - best_address += ss[best_site].offset; - last_site = best_site; -#if defined(NEW_DIAMOND_SEARCH) - while (1) { - const MV this_mv = { best_mv->row + ss[best_site].mv.row, - best_mv->col + ss[best_site].mv.col }; - if (is_mv_in(&x->mv_limits, &this_mv)) { - int sad = fn_ptr->osdf(best_address + ss[best_site].offset, - in_what->stride, wsrc, mask); - if (sad < best_sad) { - sad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); - if (sad < best_sad) { - best_sad = sad; - best_mv->row += ss[best_site].mv.row; - best_mv->col += ss[best_site].mv.col; - best_address += ss[best_site].offset; - continue; - } - } - } - break; - } -#endif - } else if (best_address == in_what_ref) { - (*num00)++; - } - } - return best_sad; -} - -static int obmc_full_pixel_diamond(const AV1_COMP *cpi, MACROBLOCK *x, - MV *mvp_full, int step_param, int sadpb, - int further_steps, int do_refine, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *ref_mv, MV *dst_mv, - int is_second) { - const int32_t *wsrc = x->wsrc_buf; - const int32_t *mask = x->mask_buf; - MV temp_mv; - int thissme, n, num00 = 0; - int bestsme = - obmc_diamond_search_sad(x, &cpi->ss_cfg, wsrc, mask, mvp_full, &temp_mv, - step_param, sadpb, &n, fn_ptr, ref_mv, is_second); - if (bestsme < INT_MAX) - bestsme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr, 1, - is_second); - *dst_mv = temp_mv; - - // If there won't be more n-step search, check to see if refining search is - // needed. - if (n > further_steps) do_refine = 0; - - while (n < further_steps) { - ++n; - - if (num00) { - num00--; - } else { - thissme = obmc_diamond_search_sad(x, &cpi->ss_cfg, wsrc, mask, mvp_full, - &temp_mv, step_param + n, sadpb, &num00, - fn_ptr, ref_mv, is_second); - if (thissme < INT_MAX) - thissme = get_obmc_mvpred_var(x, wsrc, mask, &temp_mv, ref_mv, fn_ptr, - 1, is_second); - - // check to see if refining search is needed. - if (num00 > further_steps - n) do_refine = 0; - - if (thissme < bestsme) { - bestsme = thissme; - *dst_mv = temp_mv; - } - } - } - - // final 1-away diamond refining search - if (do_refine) { - const int search_range = 8; - MV best_mv = *dst_mv; - thissme = obmc_refining_search_sad(x, wsrc, mask, &best_mv, sadpb, - search_range, fn_ptr, ref_mv, is_second); - if (thissme < INT_MAX) - thissme = get_obmc_mvpred_var(x, wsrc, mask, &best_mv, ref_mv, fn_ptr, 1, - is_second); - if (thissme < bestsme) { - bestsme = thissme; - *dst_mv = best_mv; - } - } - return bestsme; -} - -int av1_obmc_full_pixel_search(const AV1_COMP *cpi, MACROBLOCK *x, MV *mvp_full, - int step_param, int sadpb, int further_steps, - int do_refine, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *ref_mv, MV *dst_mv, int is_second) { - if (cpi->sf.obmc_full_pixel_search_level == 0) { - return obmc_full_pixel_diamond(cpi, x, mvp_full, step_param, sadpb, - further_steps, do_refine, fn_ptr, ref_mv, - dst_mv, is_second); - } else { - const int32_t *wsrc = x->wsrc_buf; - const int32_t *mask = x->mask_buf; - const int search_range = 8; - *dst_mv = *mvp_full; - clamp_mv(dst_mv, x->mv_limits.col_min, x->mv_limits.col_max, - x->mv_limits.row_min, x->mv_limits.row_max); - int thissme = obmc_refining_search_sad( - x, wsrc, mask, dst_mv, sadpb, search_range, fn_ptr, ref_mv, is_second); - if (thissme < INT_MAX) - thissme = get_obmc_mvpred_var(x, wsrc, mask, dst_mv, ref_mv, fn_ptr, 1, - is_second); - return thissme; - } -} - -// Note(yunqingwang): The following 2 functions are only used in the motion -// vector unit test, which return extreme motion vectors allowed by the MV -// limits. -#define COMMON_MV_TEST \ - SETUP_SUBPEL_SEARCH; \ - \ - (void)error_per_bit; \ - (void)vfp; \ - (void)src_address; \ - (void)src_stride; \ - (void)y; \ - (void)y_stride; \ - (void)second_pred; \ - (void)w; \ - (void)h; \ - (void)use_accurate_subpel_search; \ - (void)offset; \ - (void)mvjcost; \ - (void)mvcost; \ - (void)sse1; \ - (void)distortion; \ - \ - (void)halfiters; \ - (void)quarteriters; \ - (void)eighthiters; \ - (void)whichdir; \ - (void)forced_stop; \ - (void)hstep; \ - \ - (void)tr; \ - (void)tc; \ - (void)sse; \ - (void)thismse; \ - (void)cost_list; -// Return the maximum MV. -int av1_return_max_sub_pixel_mv(MACROBLOCK *x, const AV1_COMMON *const cm, - int mi_row, int mi_col, const MV *ref_mv, - int allow_hp, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, - int forced_stop, int iters_per_step, - int *cost_list, int *mvjcost, int *mvcost[2], - int *distortion, unsigned int *sse1, - const uint8_t *second_pred, const uint8_t *mask, - int mask_stride, int invert_mask, int w, int h, - int use_accurate_subpel_search) { - COMMON_MV_TEST; - (void)mask; - (void)mask_stride; - (void)invert_mask; - (void)minr; - (void)minc; - - (void)cm; - (void)mi_row; - (void)mi_col; - - bestmv->row = maxr; - bestmv->col = maxc; - besterr = 0; - // In the sub-pel motion search, if hp is not used, then the last bit of mv - // has to be 0. - lower_mv_precision(bestmv, allow_hp, 0); - return besterr; -} -// Return the minimum MV. -int av1_return_min_sub_pixel_mv(MACROBLOCK *x, const AV1_COMMON *const cm, - int mi_row, int mi_col, const MV *ref_mv, - int allow_hp, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, - int forced_stop, int iters_per_step, - int *cost_list, int *mvjcost, int *mvcost[2], - int *distortion, unsigned int *sse1, - const uint8_t *second_pred, const uint8_t *mask, - int mask_stride, int invert_mask, int w, int h, - int use_accurate_subpel_search) { - COMMON_MV_TEST; - (void)maxr; - (void)maxc; - (void)mask; - (void)mask_stride; - (void)invert_mask; - - (void)cm; - (void)mi_row; - (void)mi_col; - - bestmv->row = minr; - bestmv->col = minc; - besterr = 0; - // In the sub-pel motion search, if hp is not used, then the last bit of mv - // has to be 0. - lower_mv_precision(bestmv, allow_hp, 0); - return besterr; -} diff --git a/third_party/aom/av1/encoder/mcomp.h b/third_party/aom/av1/encoder/mcomp.h deleted file mode 100644 index a975218b0..000000000 --- a/third_party/aom/av1/encoder/mcomp.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_MCOMP_H_ -#define AOM_AV1_ENCODER_MCOMP_H_ - -#include "av1/encoder/block.h" -#include "aom_dsp/variance.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// The maximum number of steps in a step search given the largest -// allowed initial step -#define MAX_MVSEARCH_STEPS 11 -// Max full pel mv specified in the unit of full pixel -// Enable the use of motion vector in range [-1023, 1023]. -#define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS - 1)) - 1) -// Maximum size of the first step in full pel units -#define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS - 1)) -// Allowed motion vector pixel distance outside image border -// for Block_16x16 -#define BORDER_MV_PIXELS_B16 (16 + AOM_INTERP_EXTEND) - -#define SEARCH_RANGE_8P 3 -#define SEARCH_GRID_STRIDE_8P (2 * SEARCH_RANGE_8P + 1) -#define SEARCH_GRID_CENTER_8P \ - (SEARCH_RANGE_8P * SEARCH_GRID_STRIDE_8P + SEARCH_RANGE_8P) - -// motion search site -typedef struct search_site { - MV mv; - int offset; -} search_site; - -typedef struct search_site_config { - search_site ss[8 * MAX_MVSEARCH_STEPS + 1]; - int ss_count; - int searches_per_step; -} search_site_config; - -typedef struct { - MV coord; - int coord_offset; -} search_neighbors; - -void av1_init_dsmotion_compensation(search_site_config *cfg, int stride); -void av1_init3smotion_compensation(search_site_config *cfg, int stride); - -void av1_set_mv_search_range(MvLimits *mv_limits, const MV *mv); - -int av1_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost, - int *mvcost[2], int weight); - -// Utility to compute variance + MV rate cost for a given MV -int av1_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv, - const MV *center_mv, const aom_variance_fn_ptr_t *vfp, - int use_mvcost); -int av1_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv, - const MV *center_mv, const uint8_t *second_pred, - const aom_variance_fn_ptr_t *vfp, int use_mvcost); -int av1_get_mvpred_mask_var(const MACROBLOCK *x, const MV *best_mv, - const MV *center_mv, const uint8_t *second_pred, - const uint8_t *mask, int mask_stride, - int invert_mask, const aom_variance_fn_ptr_t *vfp, - int use_mvcost); - -struct AV1_COMP; -struct SPEED_FEATURES; - -int av1_init_search_range(int size); - -int av1_refining_search_sad(struct macroblock *x, MV *ref_mv, int sad_per_bit, - int distance, const aom_variance_fn_ptr_t *fn_ptr, - const MV *center_mv); - -// Runs sequence of diamond searches in smaller steps for RD. -int av1_full_pixel_diamond(const struct AV1_COMP *cpi, MACROBLOCK *x, - MV *mvp_full, int step_param, int sadpb, - int further_steps, int do_refine, int *cost_list, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *ref_mv, MV *dst_mv); - -int av1_hex_search(MACROBLOCK *x, MV *start_mv, int search_param, - int sad_per_bit, int do_init_search, int *cost_list, - const aom_variance_fn_ptr_t *vfp, int use_mvcost, - const MV *center_mv); - -typedef int(fractional_mv_step_fp)( - MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col, - const MV *ref_mv, int allow_hp, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, - int forced_stop, // 0 - full, 1 - qtr only, 2 - half only - int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], - int *distortion, unsigned int *sse1, const uint8_t *second_pred, - const uint8_t *mask, int mask_stride, int invert_mask, int w, int h, - int use_accurate_subpel_search); - -extern fractional_mv_step_fp av1_find_best_sub_pixel_tree; -extern fractional_mv_step_fp av1_find_best_sub_pixel_tree_pruned; -extern fractional_mv_step_fp av1_find_best_sub_pixel_tree_pruned_more; -extern fractional_mv_step_fp av1_find_best_sub_pixel_tree_pruned_evenmore; -extern fractional_mv_step_fp av1_return_max_sub_pixel_mv; -extern fractional_mv_step_fp av1_return_min_sub_pixel_mv; - -typedef int (*av1_full_search_fn_t)(const MACROBLOCK *x, const MV *ref_mv, - int sad_per_bit, int distance, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *center_mv, MV *best_mv); - -typedef int (*av1_diamond_search_fn_t)( - MACROBLOCK *x, const search_site_config *cfg, MV *ref_mv, MV *best_mv, - int search_param, int sad_per_bit, int *num00, - const aom_variance_fn_ptr_t *fn_ptr, const MV *center_mv); - -int av1_refining_search_8p_c(MACROBLOCK *x, int error_per_bit, int search_range, - const aom_variance_fn_ptr_t *fn_ptr, - const uint8_t *mask, int mask_stride, - int invert_mask, const MV *center_mv, - const uint8_t *second_pred); - -int av1_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, MV *mvp_full, int step_param, - int method, int run_mesh_search, int error_per_bit, - int *cost_list, const MV *ref_mv, int var_max, int rd, - int x_pos, int y_pos, int intra); - -int av1_obmc_full_pixel_search(const struct AV1_COMP *cpi, MACROBLOCK *x, - MV *mvp_full, int step_param, int sadpb, - int further_steps, int do_refine, - const aom_variance_fn_ptr_t *fn_ptr, - const MV *ref_mv, MV *dst_mv, int is_second); -int av1_find_best_obmc_sub_pixel_tree_up( - MACROBLOCK *x, const AV1_COMMON *const cm, int mi_row, int mi_col, - MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, - const aom_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, - int *mvjcost, int *mvcost[2], int *distortion, unsigned int *sse1, - int is_second, int use_accurate_subpel_search); - -unsigned int av1_compute_motion_cost(const struct AV1_COMP *cpi, - MACROBLOCK *const x, BLOCK_SIZE bsize, - int mi_row, int mi_col, const MV *this_mv); -unsigned int av1_refine_warped_mv(const struct AV1_COMP *cpi, - MACROBLOCK *const x, BLOCK_SIZE bsize, - int mi_row, int mi_col, int *pts0, - int *pts_inref0, int total_samples); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_MCOMP_H_ diff --git a/third_party/aom/av1/encoder/mips/msa/error_msa.c b/third_party/aom/av1/encoder/mips/msa/error_msa.c deleted file mode 100644 index 2e86dee43..000000000 --- a/third_party/aom/av1/encoder/mips/msa/error_msa.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/av1_rtcd.h" - -#include "aom_dsp/mips/macros_msa.h" - -#define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \ - static int64_t block_error_##BSize##size_msa( \ - const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \ - int64_t err = 0; \ - uint32_t loop_cnt; \ - v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \ - v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \ - v2i64 sq_coeff_r, sq_coeff_l; \ - v2i64 err0, err_dup0, err1, err_dup1; \ - \ - coeff = LD_SH(coeff_ptr); \ - dq_coeff = LD_SH(dq_coeff_ptr); \ - UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ - ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ - HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ - DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \ - sq_coeff_l); \ - DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \ - \ - coeff = LD_SH(coeff_ptr + 8); \ - dq_coeff = LD_SH(dq_coeff_ptr + 8); \ - UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ - ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ - HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ - DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ - DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ - \ - coeff_ptr += 16; \ - dq_coeff_ptr += 16; \ - \ - for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \ - coeff = LD_SH(coeff_ptr); \ - dq_coeff = LD_SH(dq_coeff_ptr); \ - UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ - ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ - HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ - DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ - DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ - \ - coeff = LD_SH(coeff_ptr + 8); \ - dq_coeff = LD_SH(dq_coeff_ptr + 8); \ - UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ - ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ - HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ - DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ - DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ - \ - coeff_ptr += 16; \ - dq_coeff_ptr += 16; \ - } \ - \ - err_dup0 = __msa_splati_d(sq_coeff_r, 1); \ - err_dup1 = __msa_splati_d(sq_coeff_l, 1); \ - sq_coeff_r += err_dup0; \ - sq_coeff_l += err_dup1; \ - *ssz = __msa_copy_s_d(sq_coeff_r, 0); \ - *ssz += __msa_copy_s_d(sq_coeff_l, 0); \ - \ - err_dup0 = __msa_splati_d(err0, 1); \ - err_dup1 = __msa_splati_d(err1, 1); \ - err0 += err_dup0; \ - err1 += err_dup1; \ - err = __msa_copy_s_d(err0, 0); \ - err += __msa_copy_s_d(err1, 0); \ - \ - return err; \ - } - -/* clang-format off */ -BLOCK_ERROR_BLOCKSIZE_MSA(16) -BLOCK_ERROR_BLOCKSIZE_MSA(64) -BLOCK_ERROR_BLOCKSIZE_MSA(256) -BLOCK_ERROR_BLOCKSIZE_MSA(1024) -/* clang-format on */ - -int64_t av1_block_error_msa(const tran_low_t *coeff_ptr, - const tran_low_t *dq_coeff_ptr, intptr_t blk_size, - int64_t *ssz) { - int64_t err; - const int16_t *coeff = (const int16_t *)coeff_ptr; - const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr; - - switch (blk_size) { - case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break; - case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break; - case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break; - case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break; - default: - err = av1_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz); - break; - } - - return err; -} diff --git a/third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c b/third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c deleted file mode 100644 index 085c08bfb..000000000 --- a/third_party/aom/av1/encoder/mips/msa/fdct4x4_msa.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include - -#include "av1/common/enums.h" - -void av1_fwht4x4_msa(const int16_t *input, int16_t *output, - int32_t src_stride) { - v8i16 in0, in1, in2, in3, in4; - - LD_SH4(input, src_stride, in0, in1, in2, in3); - - in0 += in1; - in3 -= in2; - in4 = (in0 - in3) >> 1; - SUB2(in4, in1, in4, in2, in1, in2); - in0 -= in2; - in3 += in1; - - TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1); - - in0 += in2; - in1 -= in3; - in4 = (in0 - in1) >> 1; - SUB2(in4, in2, in4, in3, in2, in3); - in0 -= in3; - in1 += in2; - - SLLI_4V(in0, in1, in2, in3, 2); - - TRANSPOSE4x4_SH_SH(in0, in3, in1, in2, in0, in3, in1, in2); - - ST4x2_UB(in0, output, 4); - ST4x2_UB(in3, output + 4, 4); - ST4x2_UB(in1, output + 8, 4); - ST4x2_UB(in2, output + 12, 4); -} diff --git a/third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c b/third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c deleted file mode 100644 index 531ae090a..000000000 --- a/third_party/aom/av1/encoder/mips/msa/temporal_filter_msa.c +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/av1_rtcd.h" - -#include "aom_dsp/mips/macros_msa.h" - -static void temporal_filter_apply_8size_msa(uint8_t *frm1_ptr, uint32_t stride, - uint8_t *frm2_ptr, int32_t filt_sth, - int32_t filt_wgt, uint32_t *acc, - uint16_t *cnt) { - uint32_t row; - uint64_t f0, f1, f2, f3; - v16i8 frm2, frm1 = { 0 }; - v16i8 frm4, frm3 = { 0 }; - v16u8 frm_r, frm_l; - v8i16 frm2_r, frm2_l; - v8i16 diff0, diff1, mod0_h, mod1_h; - v4i32 cnst3, cnst16, filt_wt, strength; - v4i32 mod0_w, mod1_w, mod2_w, mod3_w; - v4i32 diff0_r, diff0_l, diff1_r, diff1_l; - v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll; - v4i32 acc0, acc1, acc2, acc3; - v8i16 cnt0, cnt1; - - filt_wt = __msa_fill_w(filt_wgt); - strength = __msa_fill_w(filt_sth); - cnst3 = __msa_ldi_w(3); - cnst16 = __msa_ldi_w(16); - - for (row = 2; row--;) { - LD4(frm1_ptr, stride, f0, f1, f2, f3); - frm1_ptr += (4 * stride); - - LD_SB2(frm2_ptr, 16, frm2, frm4); - frm2_ptr += 32; - - LD_SW2(acc, 4, acc0, acc1); - LD_SW2(acc + 8, 4, acc2, acc3); - LD_SH2(cnt, 8, cnt0, cnt1); - - INSERT_D2_SB(f0, f1, frm1); - INSERT_D2_SB(f2, f3, frm3); - ILVRL_B2_UB(frm1, frm2, frm_r, frm_l); - HSUB_UB2_SH(frm_r, frm_l, diff0, diff1); - UNPCK_SH_SW(diff0, diff0_r, diff0_l); - UNPCK_SH_SW(diff1, diff1_r, diff1_l); - MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l, - mod0_w, mod1_w, mod2_w, mod3_w); - MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w, - mod1_w, mod2_w, mod3_w); - SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength); - - diff0_r = (mod0_w < cnst16); - diff0_l = (mod1_w < cnst16); - diff1_r = (mod2_w < cnst16); - diff1_l = (mod3_w < cnst16); - - SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w, - mod1_w, mod2_w, mod3_w); - - mod0_w = diff0_r & mod0_w; - mod1_w = diff0_l & mod1_w; - mod2_w = diff1_r & mod2_w; - mod3_w = diff1_l & mod3_w; - - MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt, - mod0_w, mod1_w, mod2_w, mod3_w); - PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h); - ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h); - ST_SH2(mod0_h, mod1_h, cnt, 8); - cnt += 16; - - UNPCK_UB_SH(frm2, frm2_r, frm2_l); - UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl); - UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll); - MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll, - mod0_w, mod1_w, mod2_w, mod3_w); - ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w, - mod2_w, mod3_w); - - ST_SW2(mod0_w, mod1_w, acc, 4); - acc += 8; - ST_SW2(mod2_w, mod3_w, acc, 4); - acc += 8; - - LD_SW2(acc, 4, acc0, acc1); - LD_SW2(acc + 8, 4, acc2, acc3); - LD_SH2(cnt, 8, cnt0, cnt1); - - ILVRL_B2_UB(frm3, frm4, frm_r, frm_l); - HSUB_UB2_SH(frm_r, frm_l, diff0, diff1); - UNPCK_SH_SW(diff0, diff0_r, diff0_l); - UNPCK_SH_SW(diff1, diff1_r, diff1_l); - MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l, - mod0_w, mod1_w, mod2_w, mod3_w); - MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w, - mod1_w, mod2_w, mod3_w); - SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength); - - diff0_r = (mod0_w < cnst16); - diff0_l = (mod1_w < cnst16); - diff1_r = (mod2_w < cnst16); - diff1_l = (mod3_w < cnst16); - - SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w, - mod1_w, mod2_w, mod3_w); - - mod0_w = diff0_r & mod0_w; - mod1_w = diff0_l & mod1_w; - mod2_w = diff1_r & mod2_w; - mod3_w = diff1_l & mod3_w; - - MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt, - mod0_w, mod1_w, mod2_w, mod3_w); - PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h); - ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h); - ST_SH2(mod0_h, mod1_h, cnt, 8); - cnt += 16; - UNPCK_UB_SH(frm4, frm2_r, frm2_l); - UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl); - UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll); - MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll, - mod0_w, mod1_w, mod2_w, mod3_w); - ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w, - mod2_w, mod3_w); - - ST_SW2(mod0_w, mod1_w, acc, 4); - acc += 8; - ST_SW2(mod2_w, mod3_w, acc, 4); - acc += 8; - } -} - -static void temporal_filter_apply_16size_msa(uint8_t *frm1_ptr, uint32_t stride, - uint8_t *frm2_ptr, - int32_t filt_sth, int32_t filt_wgt, - uint32_t *acc, uint16_t *cnt) { - uint32_t row; - v16i8 frm1, frm2, frm3, frm4; - v16u8 frm_r, frm_l; - v16i8 zero = { 0 }; - v8u16 frm2_r, frm2_l; - v8i16 diff0, diff1, mod0_h, mod1_h; - v4i32 cnst3, cnst16, filt_wt, strength; - v4i32 mod0_w, mod1_w, mod2_w, mod3_w; - v4i32 diff0_r, diff0_l, diff1_r, diff1_l; - v4i32 frm2_rr, frm2_rl, frm2_lr, frm2_ll; - v4i32 acc0, acc1, acc2, acc3; - v8i16 cnt0, cnt1; - - filt_wt = __msa_fill_w(filt_wgt); - strength = __msa_fill_w(filt_sth); - cnst3 = __msa_ldi_w(3); - cnst16 = __msa_ldi_w(16); - - for (row = 8; row--;) { - LD_SB2(frm1_ptr, stride, frm1, frm3); - frm1_ptr += stride; - - LD_SB2(frm2_ptr, 16, frm2, frm4); - frm2_ptr += 16; - - LD_SW2(acc, 4, acc0, acc1); - LD_SW2(acc, 4, acc2, acc3); - LD_SH2(cnt, 8, cnt0, cnt1); - - ILVRL_B2_UB(frm1, frm2, frm_r, frm_l); - HSUB_UB2_SH(frm_r, frm_l, diff0, diff1); - UNPCK_SH_SW(diff0, diff0_r, diff0_l); - UNPCK_SH_SW(diff1, diff1_r, diff1_l); - MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l, - mod0_w, mod1_w, mod2_w, mod3_w); - MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w, - mod1_w, mod2_w, mod3_w); - SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength); - - diff0_r = (mod0_w < cnst16); - diff0_l = (mod1_w < cnst16); - diff1_r = (mod2_w < cnst16); - diff1_l = (mod3_w < cnst16); - - SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w, - mod1_w, mod2_w, mod3_w); - - mod0_w = diff0_r & mod0_w; - mod1_w = diff0_l & mod1_w; - mod2_w = diff1_r & mod2_w; - mod3_w = diff1_l & mod3_w; - - MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt, - mod0_w, mod1_w, mod2_w, mod3_w); - PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h); - ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h); - ST_SH2(mod0_h, mod1_h, cnt, 8); - cnt += 16; - - ILVRL_B2_UH(zero, frm2, frm2_r, frm2_l); - UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl); - UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll); - MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll, - mod0_w, mod1_w, mod2_w, mod3_w); - ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w, - mod2_w, mod3_w); - - ST_SW2(mod0_w, mod1_w, acc, 4); - acc += 8; - ST_SW2(mod2_w, mod3_w, acc, 4); - acc += 8; - - LD_SW2(acc, 4, acc0, acc1); - LD_SW2(acc + 8, 4, acc2, acc3); - LD_SH2(cnt, 8, cnt0, cnt1); - - ILVRL_B2_UB(frm3, frm4, frm_r, frm_l); - HSUB_UB2_SH(frm_r, frm_l, diff0, diff1); - UNPCK_SH_SW(diff0, diff0_r, diff0_l); - UNPCK_SH_SW(diff1, diff1_r, diff1_l); - MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l, - mod0_w, mod1_w, mod2_w, mod3_w); - MUL4(mod0_w, cnst3, mod1_w, cnst3, mod2_w, cnst3, mod3_w, cnst3, mod0_w, - mod1_w, mod2_w, mod3_w); - SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength); - - diff0_r = (mod0_w < cnst16); - diff0_l = (mod1_w < cnst16); - diff1_r = (mod2_w < cnst16); - diff1_l = (mod3_w < cnst16); - - SUB4(cnst16, mod0_w, cnst16, mod1_w, cnst16, mod2_w, cnst16, mod3_w, mod0_w, - mod1_w, mod2_w, mod3_w); - - mod0_w = diff0_r & mod0_w; - mod1_w = diff0_l & mod1_w; - mod2_w = diff1_r & mod2_w; - mod3_w = diff1_l & mod3_w; - - MUL4(mod0_w, filt_wt, mod1_w, filt_wt, mod2_w, filt_wt, mod3_w, filt_wt, - mod0_w, mod1_w, mod2_w, mod3_w); - PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h); - ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h); - ST_SH2(mod0_h, mod1_h, cnt, 8); - cnt += 16; - - ILVRL_B2_UH(zero, frm4, frm2_r, frm2_l); - UNPCK_SH_SW(frm2_r, frm2_rr, frm2_rl); - UNPCK_SH_SW(frm2_l, frm2_lr, frm2_ll); - MUL4(mod0_w, frm2_rr, mod1_w, frm2_rl, mod2_w, frm2_lr, mod3_w, frm2_ll, - mod0_w, mod1_w, mod2_w, mod3_w); - ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w, - mod2_w, mod3_w); - ST_SW2(mod0_w, mod1_w, acc, 4); - acc += 8; - ST_SW2(mod2_w, mod3_w, acc, 4); - acc += 8; - - frm1_ptr += stride; - frm2_ptr += 16; - } -} - -void av1_temporal_filter_apply_msa(uint8_t *frame1_ptr, uint32_t stride, - uint8_t *frame2_ptr, uint32_t blk_w, - uint32_t blk_h, int32_t strength, - int32_t filt_wgt, uint32_t *accu, - uint16_t *cnt) { - if (8 == (blk_w * blk_h)) { - temporal_filter_apply_8size_msa(frame1_ptr, stride, frame2_ptr, strength, - filt_wgt, accu, cnt); - } else if (16 == (blk_w * blk_h)) { - temporal_filter_apply_16size_msa(frame1_ptr, stride, frame2_ptr, strength, - filt_wgt, accu, cnt); - } else { - av1_temporal_filter_apply_c(frame1_ptr, stride, frame2_ptr, blk_w, blk_h, - strength, filt_wgt, accu, cnt); - } -} diff --git a/third_party/aom/av1/encoder/ml.c b/third_party/aom/av1/encoder/ml.c deleted file mode 100644 index d21def43a..000000000 --- a/third_party/aom/av1/encoder/ml.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include - -#include "aom_dsp/aom_dsp_common.h" -#include "av1/encoder/ml.h" - -void av1_nn_predict(const float *features, const NN_CONFIG *nn_config, - float *output) { - int num_input_nodes = nn_config->num_inputs; - int buf_index = 0; - float buf[2][NN_MAX_NODES_PER_LAYER]; - const float *input_nodes = features; - - // Propagate hidden layers. - const int num_layers = nn_config->num_hidden_layers; - assert(num_layers <= NN_MAX_HIDDEN_LAYERS); - for (int layer = 0; layer < num_layers; ++layer) { - const float *weights = nn_config->weights[layer]; - const float *bias = nn_config->bias[layer]; - float *output_nodes = buf[buf_index]; - const int num_output_nodes = nn_config->num_hidden_nodes[layer]; - assert(num_output_nodes < NN_MAX_NODES_PER_LAYER); - for (int node = 0; node < num_output_nodes; ++node) { - float val = 0.0f; - for (int i = 0; i < num_input_nodes; ++i) - val += weights[i] * input_nodes[i]; - val += bias[node]; - // ReLU as activation function. - val = val > 0.0f ? val : 0.0f; // Could use AOMMAX(). - output_nodes[node] = val; - weights += num_input_nodes; - } - num_input_nodes = num_output_nodes; - input_nodes = output_nodes; - buf_index = 1 - buf_index; - } - - // Final output layer. - const float *weights = nn_config->weights[num_layers]; - for (int node = 0; node < nn_config->num_outputs; ++node) { - const float *bias = nn_config->bias[num_layers]; - float val = 0.0f; - for (int i = 0; i < num_input_nodes; ++i) - val += weights[i] * input_nodes[i]; - output[node] = val + bias[node]; - weights += num_input_nodes; - } -} - -void av1_nn_softmax(const float *input, float *output, int n) { - // Softmax function is invariant to adding the same constant - // to all input values, so we subtract the maximum input to avoid - // possible overflow. - float max_inp = input[0]; - for (int i = 1; i < n; i++) max_inp = AOMMAX(max_inp, input[i]); - float sum_out = 0.0f; - for (int i = 0; i < n; i++) { - output[i] = (float)exp(input[i] - max_inp); - sum_out += output[i]; - } - for (int i = 0; i < n; i++) output[i] /= sum_out; -} diff --git a/third_party/aom/av1/encoder/ml.h b/third_party/aom/av1/encoder/ml.h deleted file mode 100644 index cb8ef2871..000000000 --- a/third_party/aom/av1/encoder/ml.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_ML_H_ -#define AOM_AV1_ENCODER_ML_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#define NN_MAX_HIDDEN_LAYERS 10 -#define NN_MAX_NODES_PER_LAYER 128 - -typedef struct { - int num_inputs; // Number of input nodes, i.e. features. - int num_outputs; // Number of output nodes. - int num_hidden_layers; // Number of hidden layers, maximum 10. - // Number of nodes for each hidden layer. - int num_hidden_nodes[NN_MAX_HIDDEN_LAYERS]; - // Weight parameters, indexed by layer. - const float *weights[NN_MAX_HIDDEN_LAYERS + 1]; - // Bias parameters, indexed by layer. - const float *bias[NN_MAX_HIDDEN_LAYERS + 1]; -} NN_CONFIG; - -// Calculate prediction based on the given input features and neural net config. -// Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden -// layer. -void av1_nn_predict(const float *features, const NN_CONFIG *nn_config, - float *output); - -// Applies the softmax normalization function to the input -// to get a valid probability distribution in the output: -// output[i] = exp(input[i]) / sum_{k \in [0,n)}(exp(input[k])) -void av1_nn_softmax(const float *input, float *output, int n); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_ML_H_ diff --git a/third_party/aom/av1/encoder/palette.c b/third_party/aom/av1/encoder/palette.c deleted file mode 100644 index e61cd02ce..000000000 --- a/third_party/aom/av1/encoder/palette.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include - -#include "av1/encoder/cost.h" -#include "av1/encoder/palette.h" -#include "av1/encoder/random.h" - -#define AV1_K_MEANS_DIM 1 -#include "av1/encoder/k_means_template.h" -#undef AV1_K_MEANS_DIM -#define AV1_K_MEANS_DIM 2 -#include "av1/encoder/k_means_template.h" -#undef AV1_K_MEANS_DIM - -static int int_comparer(const void *a, const void *b) { - return (*(int *)a - *(int *)b); -} - -int av1_remove_duplicates(int *centroids, int num_centroids) { - int num_unique; // number of unique centroids - int i; - qsort(centroids, num_centroids, sizeof(*centroids), int_comparer); - // Remove duplicates. - num_unique = 1; - for (i = 1; i < num_centroids; ++i) { - if (centroids[i] != centroids[i - 1]) { // found a new unique centroid - centroids[num_unique++] = centroids[i]; - } - } - return num_unique; -} - -static int delta_encode_cost(const int *colors, int num, int bit_depth, - int min_val) { - if (num <= 0) return 0; - int bits_cost = bit_depth; - if (num == 1) return bits_cost; - bits_cost += 2; - int max_delta = 0; - int deltas[PALETTE_MAX_SIZE]; - const int min_bits = bit_depth - 3; - for (int i = 1; i < num; ++i) { - const int delta = colors[i] - colors[i - 1]; - deltas[i - 1] = delta; - assert(delta >= min_val); - if (delta > max_delta) max_delta = delta; - } - int bits_per_delta = AOMMAX(av1_ceil_log2(max_delta + 1 - min_val), min_bits); - assert(bits_per_delta <= bit_depth); - int range = (1 << bit_depth) - colors[0] - min_val; - for (int i = 0; i < num - 1; ++i) { - bits_cost += bits_per_delta; - range -= deltas[i]; - bits_per_delta = AOMMIN(bits_per_delta, av1_ceil_log2(range)); - } - return bits_cost; -} - -int av1_index_color_cache(const uint16_t *color_cache, int n_cache, - const uint16_t *colors, int n_colors, - uint8_t *cache_color_found, int *out_cache_colors) { - if (n_cache <= 0) { - for (int i = 0; i < n_colors; ++i) out_cache_colors[i] = colors[i]; - return n_colors; - } - memset(cache_color_found, 0, n_cache * sizeof(*cache_color_found)); - int n_in_cache = 0; - int in_cache_flags[PALETTE_MAX_SIZE]; - memset(in_cache_flags, 0, sizeof(in_cache_flags)); - for (int i = 0; i < n_cache && n_in_cache < n_colors; ++i) { - for (int j = 0; j < n_colors; ++j) { - if (colors[j] == color_cache[i]) { - in_cache_flags[j] = 1; - cache_color_found[i] = 1; - ++n_in_cache; - break; - } - } - } - int j = 0; - for (int i = 0; i < n_colors; ++i) - if (!in_cache_flags[i]) out_cache_colors[j++] = colors[i]; - assert(j == n_colors - n_in_cache); - return j; -} - -int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi, - int bit_depth, int *zero_count, - int *min_bits) { - const int n = pmi->palette_size[1]; - const int max_val = 1 << bit_depth; - int max_d = 0; - *min_bits = bit_depth - 4; - *zero_count = 0; - for (int i = 1; i < n; ++i) { - const int delta = pmi->palette_colors[2 * PALETTE_MAX_SIZE + i] - - pmi->palette_colors[2 * PALETTE_MAX_SIZE + i - 1]; - const int v = abs(delta); - const int d = AOMMIN(v, max_val - v); - if (d > max_d) max_d = d; - if (d == 0) ++(*zero_count); - } - return AOMMAX(av1_ceil_log2(max_d + 1), *min_bits); -} - -int av1_palette_color_cost_y(const PALETTE_MODE_INFO *const pmi, - uint16_t *color_cache, int n_cache, - int bit_depth) { - const int n = pmi->palette_size[0]; - int out_cache_colors[PALETTE_MAX_SIZE]; - uint8_t cache_color_found[2 * PALETTE_MAX_SIZE]; - const int n_out_cache = - av1_index_color_cache(color_cache, n_cache, pmi->palette_colors, n, - cache_color_found, out_cache_colors); - const int total_bits = - n_cache + delta_encode_cost(out_cache_colors, n_out_cache, bit_depth, 1); - return av1_cost_literal(total_bits); -} - -int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi, - uint16_t *color_cache, int n_cache, - int bit_depth) { - const int n = pmi->palette_size[1]; - int total_bits = 0; - // U channel palette color cost. - int out_cache_colors[PALETTE_MAX_SIZE]; - uint8_t cache_color_found[2 * PALETTE_MAX_SIZE]; - const int n_out_cache = av1_index_color_cache( - color_cache, n_cache, pmi->palette_colors + PALETTE_MAX_SIZE, n, - cache_color_found, out_cache_colors); - total_bits += - n_cache + delta_encode_cost(out_cache_colors, n_out_cache, bit_depth, 0); - - // V channel palette color cost. - int zero_count = 0, min_bits_v = 0; - const int bits_v = - av1_get_palette_delta_bits_v(pmi, bit_depth, &zero_count, &min_bits_v); - const int bits_using_delta = - 2 + bit_depth + (bits_v + 1) * (n - 1) - zero_count; - const int bits_using_raw = bit_depth * n; - total_bits += 1 + AOMMIN(bits_using_delta, bits_using_raw); - return av1_cost_literal(total_bits); -} diff --git a/third_party/aom/av1/encoder/palette.h b/third_party/aom/av1/encoder/palette.h deleted file mode 100644 index 8b88c4755..000000000 --- a/third_party/aom/av1/encoder/palette.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_PALETTE_H_ -#define AOM_AV1_ENCODER_PALETTE_H_ - -#include "av1/common/blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define AV1_K_MEANS_RENAME(func, dim) func##_dim##dim - -void AV1_K_MEANS_RENAME(av1_calc_indices, 1)(const int *data, - const int *centroids, - uint8_t *indices, int n, int k); -void AV1_K_MEANS_RENAME(av1_calc_indices, 2)(const int *data, - const int *centroids, - uint8_t *indices, int n, int k); -void AV1_K_MEANS_RENAME(av1_k_means, 1)(const int *data, int *centroids, - uint8_t *indices, int n, int k, - int max_itr); -void AV1_K_MEANS_RENAME(av1_k_means, 2)(const int *data, int *centroids, - uint8_t *indices, int n, int k, - int max_itr); - -// Given 'n' 'data' points and 'k' 'centroids' each of dimension 'dim', -// calculate the centroid 'indices' for the data points. -static INLINE void av1_calc_indices(const int *data, const int *centroids, - uint8_t *indices, int n, int k, int dim) { - if (dim == 1) { - AV1_K_MEANS_RENAME(av1_calc_indices, 1)(data, centroids, indices, n, k); - } else if (dim == 2) { - AV1_K_MEANS_RENAME(av1_calc_indices, 2)(data, centroids, indices, n, k); - } else { - assert(0 && "Untemplated k means dimension"); - } -} - -// Given 'n' 'data' points and an initial guess of 'k' 'centroids' each of -// dimension 'dim', runs up to 'max_itr' iterations of k-means algorithm to get -// updated 'centroids' and the centroid 'indices' for elements in 'data'. -// Note: the output centroids are rounded off to nearest integers. -static INLINE void av1_k_means(const int *data, int *centroids, - uint8_t *indices, int n, int k, int dim, - int max_itr) { - if (dim == 1) { - AV1_K_MEANS_RENAME(av1_k_means, 1)(data, centroids, indices, n, k, max_itr); - } else if (dim == 2) { - AV1_K_MEANS_RENAME(av1_k_means, 2)(data, centroids, indices, n, k, max_itr); - } else { - assert(0 && "Untemplated k means dimension"); - } -} - -// Given a list of centroids, returns the unique number of centroids 'k', and -// puts these unique centroids in first 'k' indices of 'centroids' array. -// Ideally, the centroids should be rounded to integers before calling this -// method. -int av1_remove_duplicates(int *centroids, int num_centroids); - -// Given a color cache and a set of base colors, find if each cache color is -// present in the base colors, record the binary results in "cache_color_found". -// Record the colors that are not in the color cache in "out_cache_colors". -int av1_index_color_cache(const uint16_t *color_cache, int n_cache, - const uint16_t *colors, int n_colors, - uint8_t *cache_color_found, int *out_cache_colors); - -// Return the number of bits used to transmit each v palette color delta; -// assign zero_count with the number of deltas being 0. -int av1_get_palette_delta_bits_v(const PALETTE_MODE_INFO *const pmi, - int bit_depth, int *zero_count, int *min_bits); - -// Return the rate cost for transmitting luma palette color values. -int av1_palette_color_cost_y(const PALETTE_MODE_INFO *const pmi, - uint16_t *color_cache, int n_cache, int bit_depth); - -// Return the rate cost for transmitting chroma palette color values. -int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi, - uint16_t *color_cache, int n_cache, - int bit_depth); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_PALETTE_H_ diff --git a/third_party/aom/av1/encoder/partition_model_weights.h b/third_party/aom/av1/encoder/partition_model_weights.h deleted file mode 100644 index 437ea43f9..000000000 --- a/third_party/aom/av1/encoder/partition_model_weights.h +++ /dev/null @@ -1,2448 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_PARTITION_MODEL_WEIGHTS_H_ -#define AOM_AV1_ENCODER_PARTITION_MODEL_WEIGHTS_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "av1/encoder/ml.h" - -#define FEATURE_SIZE 10 -#define LABEL_SIZE 16 -// nn model for ab partition pruning, 128x128. -static const float av1_ab_partition_nn_weights_128_layer0[FEATURE_SIZE * 64] = { - -0.715251f, -0.015767f, -0.667353f, -0.345255f, 0.177887f, -0.469759f, - 0.426152f, 0.489798f, 0.469865f, 0.773821f, 0.088517f, 0.074585f, - 0.838754f, 0.048449f, -0.007584f, 0.638968f, 0.233305f, -0.319236f, - -0.257124f, -0.170869f, 0.137180f, 0.114852f, -0.721241f, -0.947962f, - -0.411298f, 0.494306f, -0.060435f, -0.648421f, -0.126624f, 0.072686f, - -0.143904f, -0.115839f, -0.175527f, -0.117728f, 0.040686f, -0.189925f, - 0.134361f, -0.258070f, -0.177558f, 0.158049f, 0.168668f, -0.062919f, - 0.341986f, 0.038100f, -0.435577f, -0.321255f, 0.203213f, 0.213061f, - 0.533304f, 0.359296f, -0.079558f, 0.004637f, 0.663904f, 0.043779f, - 0.383018f, 1.136559f, -0.084155f, 0.333057f, -0.199011f, 0.152059f, - -0.078419f, -0.167752f, -0.093651f, 0.083171f, -0.190143f, 0.086195f, - -0.280632f, -0.160663f, -0.017298f, 0.122628f, -0.138116f, 0.062927f, - 0.222462f, 0.626979f, 0.426928f, 0.117170f, -0.240457f, 0.053750f, - 0.038017f, 0.007359f, -0.017595f, 0.101407f, 0.332891f, 0.074933f, - 0.306498f, 0.219380f, -0.151638f, -0.247976f, 0.343405f, 0.121256f, - 0.049173f, 0.171474f, -0.139608f, -1.016599f, -0.345553f, -0.901138f, - 0.243401f, 0.059928f, -0.089396f, -0.195565f, 0.364705f, -0.020400f, - -1.383672f, 0.413018f, 0.536950f, -0.020904f, -1.335306f, -0.732290f, - 0.102885f, 0.315290f, -0.208521f, -0.081811f, 0.182300f, 0.125712f, - -0.593833f, -0.220639f, -0.314155f, 0.188327f, 0.118503f, 0.524427f, - -1.083859f, -1.130640f, 0.390352f, -0.045591f, 0.113160f, -0.009149f, - -0.096183f, 0.115829f, 0.377752f, 0.318396f, -0.591983f, 0.004797f, - -0.497377f, -0.342248f, 0.079546f, -0.025249f, -0.295972f, 0.615501f, - -0.464372f, 0.418315f, -0.173556f, 0.105217f, 0.298073f, 0.082478f, - 0.033223f, 0.977341f, -0.372982f, -0.052337f, 0.154124f, 0.396787f, - 0.536654f, -0.139061f, -0.223702f, 0.229666f, -0.846766f, 0.107723f, - 0.563839f, -0.483141f, 0.304813f, -0.765283f, 0.070964f, 0.151101f, - 0.275188f, 0.490303f, 1.175892f, 0.085377f, -0.191200f, 0.544532f, - -0.365075f, 0.167546f, 0.052183f, -0.220529f, -0.212227f, -0.144988f, - -0.273356f, -0.062023f, 0.103993f, -0.238493f, -0.161204f, -0.054611f, - -0.166672f, 0.128327f, 0.461751f, -0.545822f, 0.739798f, 0.594386f, - -0.163192f, -0.332501f, 0.363834f, -0.065043f, 0.474812f, -0.138811f, - 0.170924f, -0.778142f, -0.316474f, -0.508065f, -0.039986f, -0.478001f, - 0.340591f, 0.041783f, 0.055419f, 0.015155f, -0.981830f, -1.355237f, - 0.347516f, 1.155327f, 0.081319f, 0.274163f, -0.327230f, -0.113478f, - 0.556552f, -0.055986f, 0.217318f, -0.445351f, 0.325759f, 0.526547f, - -0.657434f, -0.572214f, -0.037087f, 0.081384f, 0.064518f, 0.014892f, - 0.215279f, 1.834504f, -0.242107f, 0.079810f, 0.129558f, 0.079588f, - -0.035189f, -0.221745f, -0.163414f, 0.043978f, -1.028662f, -0.623609f, - 1.130336f, 0.664661f, -0.063975f, -0.415863f, 0.018581f, 0.157758f, - 0.200570f, 0.063420f, 0.901039f, -0.746286f, 0.196230f, -0.290592f, - 0.042373f, -0.502500f, 0.183638f, 0.103394f, -0.298858f, 0.145436f, - 0.196916f, 0.108319f, -0.448572f, -0.881385f, 0.302497f, 0.121679f, - -0.021327f, 0.025150f, 0.481306f, -0.359634f, 0.350257f, -0.228647f, - -0.669860f, 0.260025f, -0.034182f, 0.619247f, -0.158826f, -0.405864f, - 0.674112f, -0.027885f, -0.325274f, -0.241492f, 0.036024f, -0.437685f, - -0.091458f, -0.109295f, -0.350676f, 0.044706f, 0.297059f, 0.016290f, - 1.121203f, 1.289062f, -1.299476f, -1.129221f, 0.103752f, 0.131302f, - -0.263265f, 0.222155f, -0.229908f, 0.013922f, -0.226001f, -0.248383f, - -0.004415f, -0.020958f, 0.055634f, 0.086200f, 0.114556f, -0.184061f, - -0.096210f, -0.146466f, -0.249618f, -0.195998f, 0.088758f, 0.023781f, - -0.264460f, 0.157026f, -0.235228f, -0.102564f, 0.043463f, -0.187823f, - -0.257500f, -0.199049f, -0.242210f, 0.030448f, 0.221604f, 0.151804f, - -0.100404f, -0.073931f, 0.144749f, -0.001572f, -1.438079f, -0.233716f, - 0.733422f, 1.727080f, -0.036397f, 0.027551f, 0.425321f, 0.085703f, - 0.031186f, 0.032333f, -0.675130f, 1.437733f, -0.202392f, -0.525003f, - 0.087048f, 0.328194f, -0.079989f, -0.391088f, -0.238732f, -0.120660f, - -0.139600f, 0.154665f, 0.026202f, -0.233501f, -0.009046f, -0.149187f, - -0.199646f, 0.115375f, 0.209762f, -0.014875f, 0.124038f, -0.119985f, - 1.079625f, -0.461513f, 0.614114f, 0.021003f, 0.439449f, -0.824834f, - -0.299701f, 0.193817f, -0.870551f, -1.262313f, -0.079517f, 0.341570f, - 0.305310f, -0.089721f, -0.317314f, -0.075631f, 0.127172f, -0.208635f, - 1.191922f, 0.163141f, 0.564285f, 0.286352f, 0.480865f, 0.173094f, - -0.094034f, -0.071339f, -0.328992f, -0.006382f, 0.314705f, 0.090258f, - -0.016099f, 0.193230f, 0.188061f, 0.398144f, 0.722781f, 0.769949f, - 0.025442f, -0.162016f, 0.070192f, -0.056946f, -0.100957f, -0.219934f, - -0.203492f, -0.015454f, -0.013272f, -0.098008f, 0.051707f, -0.017493f, - 0.527446f, 0.083605f, 0.588318f, 0.878215f, 0.028747f, -0.146479f, - -0.345170f, -0.136059f, -0.152005f, -0.203634f, 0.232702f, -0.101340f, - -0.027733f, -0.282611f, 0.265366f, 0.082362f, -0.265420f, -0.131124f, - 0.166303f, 0.040194f, -0.100710f, 0.579151f, -0.530136f, 0.163422f, - -0.998821f, -1.565311f, -1.774785f, -2.493372f, 0.116970f, -0.090302f, - 1.723272f, 0.552370f, -0.295954f, -0.439095f, -0.266730f, 0.027936f, - 0.539616f, -0.234902f, -0.167601f, -0.149877f, -0.242983f, 0.122353f, - -0.121620f, -0.205517f, -0.180144f, -0.264208f, 0.151500f, -0.159378f, - 0.029145f, -0.050892f, -0.223407f, -0.246239f, 0.043152f, -0.018460f, - 0.169972f, -0.187769f, -0.034670f, -0.238330f, 0.288070f, -0.093243f, - -0.437105f, -0.573376f, 0.660073f, 0.285727f, 0.408470f, 0.158475f, - 0.032699f, 0.056280f, -0.237176f, -0.083003f, 0.105598f, -0.169522f, - -0.260420f, -0.121100f, -0.173983f, -0.195693f, -0.232028f, 0.224940f, - 0.029124f, 0.009580f, -0.252034f, 0.103087f, 1.156561f, 0.603848f, - -0.562805f, -1.652742f, -0.568288f, -1.829395f, 0.046169f, 0.076095f, - 1.490819f, 0.415893f, -0.277788f, -0.115787f, 0.093750f, 0.270726f, - -0.395983f, -0.353742f, 0.034605f, 0.005342f, 0.184537f, 0.086445f, - 0.156417f, 1.476367f, 0.122587f, 0.002145f, 0.431057f, -0.381184f, - -1.646457f, -0.014009f, -0.671224f, 0.193726f, -0.019247f, -0.031267f, - -0.046208f, 0.298733f, 0.064734f, 0.616984f, 0.039381f, 0.182722f, - -0.116670f, 0.233093f, -1.214374f, -0.817970f, -0.064394f, -0.584783f, - 0.077697f, -0.266720f, 0.130875f, -0.235295f, -0.265754f, -0.159999f, - -0.250114f, -0.183017f, 0.194403f, -0.105808f, -0.169215f, -0.240866f, - -0.026662f, -0.045123f, -0.036175f, -0.167471f, -0.192908f, -0.232602f, - -0.267036f, -0.112500f, -0.257944f, -0.111909f, -0.802226f, -0.008800f, - 0.881460f, -0.678603f, 0.008666f, -0.252053f, -0.341035f, -0.175290f, - 0.183012f, 0.385991f, 0.079888f, -0.014039f, -0.148653f, 0.671778f, - -0.130219f, 1.086467f, 0.129267f, -0.040400f, -0.201221f, -0.077005f, - 0.015890f, 0.000781f, 0.137764f, 1.389546f, 0.172152f, 0.047279f, - -0.042783f, 0.127740f, 0.141467f, -0.335738f, -1.396392f, 0.031496f, - 0.357385f, 0.343602f, -0.714553f, 0.311014f, 0.132845f, 0.061149f, - 0.006796f, 0.568106f, -0.255949f, 0.104134f, -0.993447f, 0.298135f, - -0.406590f, -0.049228f, -0.578570f, -0.188561f, -0.107046f, 0.374095f, - 0.068481f, 0.036240f, -0.495801f, 0.180574f, -0.766129f, 0.886967f, - -0.568868f, -0.936062f, -0.418886f, -0.058735f, -0.511964f, -0.438596f, - 0.019016f, -0.015837f, 0.600197f, 0.429773f, 0.315026f, 0.319667f, - 0.214617f, -0.017316f, 0.270257f, -0.040524f, 0.695803f, -0.015223f, - -1.554965f, 0.356997f, -1.472428f, 0.024637f, -0.562958f, 0.870351f, - 0.193635f, 0.036063f, 0.328638f, 0.200274f, -1.634707f, 0.110534f, - 0.420104f, -0.072042f, -0.006404f, 0.171680f, -}; - -static const float av1_ab_partition_nn_bias_128_layer0[64] = { - 0.643147f, -1.348826f, 0.431627f, 0.000000f, 0.102717f, -0.772628f, - -0.034351f, -0.761977f, -0.638397f, 0.541969f, -0.391311f, 0.563076f, - 0.148553f, 0.267217f, -0.788092f, 0.544573f, -0.546280f, 0.000000f, - -0.446945f, 0.127732f, 0.270624f, -0.219435f, -1.220203f, 0.324584f, - 0.110885f, 0.276547f, 0.179726f, -0.375160f, 0.026401f, -0.032595f, - 0.000000f, -0.047932f, -0.648602f, -0.512637f, -0.031661f, -0.236761f, - 0.476453f, -0.028021f, -0.013673f, -0.015578f, -0.920077f, 0.000000f, - 0.915351f, -0.209962f, 0.000000f, -0.025731f, 0.218288f, 0.000000f, - 0.047726f, -0.813077f, -1.263281f, 0.239087f, 0.278614f, -0.030753f, - 0.000000f, 0.346744f, -0.948543f, -1.174211f, 0.216377f, 0.498913f, - 0.853918f, 0.002504f, -0.190403f, 0.452050f, -}; - -static const float av1_ab_partition_nn_weights_128_layer1[64 * LABEL_SIZE] = { - 0.179769f, 1.499417f, -0.445135f, -0.142278f, -0.337661f, 0.682064f, - -0.203213f, 0.302171f, 0.226877f, -0.422169f, 1.687586f, 0.783773f, - 0.220995f, 0.253482f, 0.370435f, -1.342775f, 0.337229f, -0.271473f, - 0.291796f, 1.362227f, -1.751397f, -0.086178f, 0.725496f, -0.118597f, - 0.227963f, -0.501577f, 0.223849f, -0.122421f, -0.123437f, -0.051045f, - -0.020115f, 0.212711f, 0.246025f, 0.088120f, -0.168995f, 1.740190f, - -0.195098f, 0.680339f, -0.589572f, -0.075244f, 0.878766f, 0.064092f, - -3.548527f, 0.001660f, 0.107926f, -0.169501f, -0.455212f, 0.123045f, - -1.836998f, 0.330365f, 1.301475f, 0.454761f, -0.576552f, -0.190761f, - 0.208459f, 0.618483f, 1.383364f, 0.970718f, 0.390174f, 0.406252f, - -0.564519f, -0.312062f, 1.345712f, -0.151873f, 0.109290f, 0.408847f, - 0.391243f, 0.152024f, 0.181764f, -0.036263f, -0.160466f, 0.153595f, - 0.049163f, -0.753012f, -1.804062f, 0.347475f, -2.746580f, 0.575618f, - 0.261799f, 0.210505f, -0.302054f, -0.109872f, 0.199506f, -1.182971f, - 0.723668f, 0.177758f, -0.338202f, 0.254396f, -0.220023f, 0.043504f, - 0.669866f, -0.040816f, -0.402730f, 0.017990f, 0.215523f, -0.216816f, - 0.454826f, -0.726067f, -0.018750f, -0.928679f, 0.154315f, -0.465641f, - 0.144566f, -0.030064f, -0.054667f, -0.154055f, 0.625384f, 1.323795f, - -0.159496f, 0.097072f, -0.463197f, -0.057938f, 0.750290f, -0.233061f, - 0.412631f, -0.535223f, -0.151423f, -0.154583f, 0.024721f, -0.494448f, - 0.230594f, -0.980138f, -0.653968f, 0.126079f, 0.051814f, -0.053219f, - -0.421708f, -0.228853f, 0.237885f, 0.888157f, 0.059655f, 0.241295f, - 0.210443f, 0.228238f, 0.119127f, -0.051989f, -0.355408f, 0.182215f, - 0.244277f, -0.104577f, -0.558035f, -0.023270f, 0.054571f, 0.700646f, - -0.223006f, 0.115523f, 0.023391f, 0.437264f, 0.709477f, -0.531212f, - -0.094731f, 0.328161f, -0.105418f, -0.133511f, 0.497168f, -0.030948f, - -0.407132f, -0.043943f, 0.155505f, 0.251945f, 0.205010f, 0.167160f, - 0.083654f, -0.636810f, 0.401315f, -0.398414f, 0.290046f, 0.206846f, - 0.042218f, 0.168150f, 0.843181f, -0.671242f, -0.202392f, -0.073301f, - 0.142895f, 0.237466f, 0.212145f, -0.091828f, 0.187038f, -0.720841f, - -0.616069f, -0.238021f, 0.065365f, 0.434119f, 0.179023f, -0.040107f, - -0.430734f, -0.297368f, 0.575954f, 0.382619f, -0.709787f, -0.320810f, - 0.242342f, -0.047614f, 0.705216f, 0.098077f, 0.357179f, 0.046017f, - 0.115074f, -0.412305f, -0.272304f, 0.048096f, -0.803811f, 0.275000f, - 0.642198f, 0.180286f, -0.087178f, -0.112707f, -0.394443f, 0.201989f, - 0.241759f, -1.038870f, 0.728124f, 0.800559f, -1.296268f, 0.198612f, - -0.053478f, 0.414344f, -0.510529f, 0.124179f, -2.219115f, -0.074583f, - -0.143055f, 0.001697f, 0.810811f, -0.657140f, 0.186818f, -0.936414f, - 0.539578f, -0.308244f, -0.126624f, -0.204767f, 0.091145f, -0.049340f, - 0.252014f, 0.394582f, 0.018764f, -0.060377f, -0.019133f, 0.064083f, - 0.069211f, -0.526693f, 0.209850f, -0.481466f, -0.468302f, -0.100407f, - 0.241018f, -1.037781f, 0.038539f, -2.113840f, -0.974895f, 0.163187f, - 0.425132f, -0.772546f, -1.261254f, -0.217488f, -0.971748f, -0.805640f, - -0.745175f, -0.177077f, 0.217658f, 0.381431f, -0.052338f, 0.087176f, - -0.165972f, 0.085937f, 0.472564f, -0.796627f, -2.453307f, 0.569664f, - -0.233010f, -0.192134f, 0.064339f, -0.111411f, -0.262469f, -0.410022f, - 0.519993f, -0.684620f, 0.393460f, -0.277753f, -0.153624f, 0.528984f, - -0.415558f, -0.445863f, 0.588512f, -0.142439f, -0.132127f, 0.199776f, - -0.579284f, 0.119488f, -0.033590f, -0.503846f, -0.674979f, 0.335125f, - 0.020519f, 0.233973f, -0.297998f, -0.051511f, 0.518626f, -0.412782f, - -0.074045f, 0.130523f, 0.465751f, -0.117795f, 2.535813f, 0.352108f, - -0.499228f, 0.379784f, 0.056699f, 0.173142f, -0.076519f, -0.026666f, - 0.017834f, 0.492333f, 0.093364f, 0.037867f, -0.165420f, -0.356429f, - -0.562334f, 0.057656f, -0.307544f, 0.085857f, -0.559851f, 0.107230f, - -0.398633f, 0.152618f, -0.216835f, -0.024539f, 0.026044f, -0.249519f, - -0.563594f, -0.746025f, 0.025265f, -0.298888f, -0.185243f, 0.058794f, - 0.233696f, -0.115223f, 0.144617f, -0.864390f, 0.619944f, -0.023980f, - 0.019481f, 0.225252f, 0.416552f, -0.115993f, 0.935387f, 0.744386f, - 0.053353f, -0.052582f, -0.065650f, 0.228488f, -0.032042f, -0.371252f, - -0.003638f, -0.736984f, -0.203776f, 0.030922f, -0.065577f, -0.031643f, - -0.049253f, -0.054640f, 0.787134f, 0.545414f, -0.140297f, -0.124274f, - -0.110011f, -0.029552f, 0.657005f, 0.214973f, -0.374300f, 0.251642f, - 0.276591f, 0.030566f, -0.145470f, 0.350579f, -0.356436f, -0.052694f, - -0.063966f, -0.751008f, -1.042392f, 0.328892f, -0.425058f, -0.421571f, - -0.571889f, -1.141472f, -0.125216f, 0.212713f, -0.485170f, -0.088791f, - 0.124589f, 0.023237f, 0.077635f, 0.020901f, -0.271402f, -0.321424f, - -0.513946f, -0.867872f, -0.284593f, 0.106276f, 0.220192f, -0.143532f, - -0.014648f, 0.073402f, 0.327256f, -0.139803f, 0.168763f, 0.048199f, - -0.122526f, 0.111713f, -0.134257f, 0.810364f, -0.085222f, -0.259221f, - -0.239349f, 0.044448f, 0.205031f, 0.413113f, -0.107720f, -0.018816f, - -0.247741f, -0.004963f, 0.041170f, -0.158019f, 0.134839f, 0.129502f, - 0.800488f, -1.041584f, -0.129336f, 0.170834f, 0.566586f, -0.230443f, - 0.437937f, -0.149922f, -0.046665f, -0.094646f, 0.200070f, 0.072943f, - -0.076943f, -0.084971f, -0.515843f, -0.146720f, 0.472869f, -0.444731f, - -0.100877f, 0.545196f, -1.786626f, -0.482946f, 0.500509f, -0.843257f, - 0.200374f, 0.045103f, -0.575718f, -0.164335f, -0.232522f, -0.021825f, - -0.139490f, 0.356058f, -0.352075f, 0.061751f, -0.200616f, -1.180921f, - -0.181355f, -0.137459f, 0.247574f, 0.181541f, 0.184314f, -0.961482f, - 0.493615f, 0.910261f, -2.279238f, 0.648631f, -0.055526f, -0.037137f, - 0.038643f, 0.136609f, -0.819373f, -0.040840f, -0.265989f, 0.006877f, - 0.454651f, -0.595323f, -0.099500f, -0.263717f, 0.150456f, 0.245077f, - -0.268666f, 0.162232f, -0.516451f, -0.024501f, 0.188046f, -0.002262f, - 0.261319f, 0.004173f, 0.746982f, 0.174761f, 0.470447f, -0.159558f, - -0.385240f, 0.023084f, -0.133520f, -0.220607f, -0.018731f, -0.373558f, - -0.707763f, -1.850150f, -0.807404f, -0.168063f, -0.071435f, -0.160740f, - -0.478789f, -1.070674f, -0.489740f, -0.255796f, 0.100486f, -0.153361f, - 0.334394f, -0.569472f, -0.198118f, 0.255922f, 0.104717f, -0.065179f, - 0.111879f, -0.447237f, 1.373623f, -0.190191f, -0.063311f, 0.337529f, - -0.138800f, 0.057009f, -0.137006f, 0.641378f, 0.883147f, -0.679655f, - 0.267717f, -0.351602f, -0.135225f, 0.229398f, -0.513225f, -1.120345f, - 0.528786f, -0.051081f, 0.086653f, 0.140141f, -0.563969f, 0.333402f, - -0.174745f, 0.321093f, -0.438641f, -0.005131f, 0.247415f, 0.110120f, - -0.076308f, -0.083244f, 0.838944f, -0.113043f, -0.013258f, -0.175028f, - -0.179941f, 0.272676f, -0.047946f, -0.088076f, -0.450031f, 0.053929f, - -0.083549f, -0.089952f, -0.186253f, 0.257483f, 0.011019f, 0.586435f, - 0.060580f, -0.052078f, 0.090277f, -0.780869f, 0.969811f, -0.025349f, - -0.281917f, 0.014857f, 0.231863f, -0.228601f, -0.003861f, 0.226550f, - 0.141825f, -0.102171f, -0.010387f, 0.220378f, -2.561975f, -0.497071f, - -0.315117f, 0.371981f, 0.138247f, 0.625031f, -0.308133f, -0.217876f, - 0.005615f, -0.860179f, 0.747491f, 0.006356f, -0.057024f, -0.483189f, - 0.055592f, -0.316834f, 0.069858f, 0.218788f, -0.200044f, 0.227588f, - 0.215496f, -0.055324f, -0.393147f, -0.394062f, -0.253264f, -0.075619f, - -0.152512f, -0.332995f, 0.129053f, 0.178668f, -0.302694f, 0.030678f, - 0.925896f, 0.964375f, 0.169021f, -0.218657f, -0.627204f, 0.206437f, - -0.521336f, 0.176206f, 0.142733f, 0.139248f, 0.411682f, 0.181544f, - 0.224850f, -0.935547f, -0.558208f, 0.348096f, 0.342129f, -0.389340f, - -0.236308f, -0.132099f, 0.073642f, 0.089391f, -0.306901f, -0.397842f, - 0.444282f, 0.074623f, -0.051075f, -0.106617f, -0.184037f, -0.239046f, - -0.138761f, 0.120794f, -0.647577f, -0.336471f, 0.527899f, -0.164234f, - -0.028354f, 1.083678f, -0.251534f, -0.145903f, -0.182783f, 0.070976f, - -0.199590f, -0.400306f, -0.029763f, -0.548042f, -0.266270f, -0.118084f, - -1.152632f, 0.383685f, -0.105895f, -0.096829f, 0.118382f, 0.047447f, - -0.019051f, 0.310180f, -0.162793f, -0.029574f, 0.058054f, -0.636017f, - 0.490639f, 0.158347f, -0.385701f, -0.147057f, 1.285825f, -1.276083f, - -0.021795f, -0.101600f, 0.163254f, 0.267160f, -2.317864f, -0.098598f, - -0.296337f, -0.309017f, 0.164127f, -0.270012f, -0.071187f, -0.262270f, - 0.075415f, -0.368328f, 0.186728f, -0.158031f, 0.481663f, 0.515950f, - -0.162551f, 0.497981f, 0.262196f, 0.168479f, 0.726066f, -0.243856f, - -0.058998f, 0.140168f, 0.053242f, -0.624623f, -0.249480f, 0.055197f, - -1.376804f, 0.417571f, 0.203784f, 0.174370f, -0.155531f, -0.029400f, - -0.491473f, 0.079811f, -0.080123f, 1.345900f, 0.637077f, 0.434862f, - -1.787438f, 0.005756f, -0.362706f, 0.179458f, -0.288263f, 0.516788f, - -0.921248f, 0.043794f, -0.137729f, -0.196171f, -0.046295f, -0.793781f, - -0.156532f, -0.132566f, 0.517989f, -0.154321f, -0.054174f, -0.077900f, - -0.373316f, -0.117718f, 0.188986f, -0.476188f, -0.245312f, 0.181439f, - -0.161024f, -0.229059f, -3.079907f, -0.225452f, -0.594355f, -0.558027f, - -0.135429f, 0.125766f, -0.081314f, -0.350894f, -0.163165f, -1.936507f, - -0.205966f, 0.031472f, 0.744446f, -0.006680f, -0.837551f, 0.605862f, - -0.854929f, -1.543750f, -0.307704f, -0.240517f, 0.178240f, -0.183586f, - -0.010307f, 0.099373f, -0.228278f, 0.175236f, -0.000133f, 0.104491f, - -1.540545f, -0.570971f, -0.252885f, 0.483036f, 0.052531f, 0.260214f, - -0.515016f, -0.602081f, -0.485690f, -0.730710f, 0.163719f, -1.775975f, - -0.298634f, 0.323626f, -0.373579f, -0.872977f, 0.619574f, 0.026862f, - -0.122531f, -0.084698f, -2.436297f, 0.483996f, -0.203640f, -0.302157f, - -0.150666f, -0.238320f, 0.089250f, 0.236485f, -0.668654f, -0.122863f, - 0.491152f, -0.226444f, -0.181248f, 0.120158f, 0.294027f, 0.250056f, - 0.307601f, 0.357875f, -1.746455f, -0.175670f, 0.385447f, -0.108808f, - -0.090235f, -0.642504f, -0.486004f, -0.055160f, -0.068692f, 0.009736f, - 0.607555f, -0.489426f, 0.150624f, 0.598114f, -0.128816f, -0.445793f, - -0.066524f, -0.254380f, 0.227106f, -0.406495f, -0.121632f, -0.275960f, - -0.136494f, 0.339457f, -1.318132f, -0.417572f, -2.614077f, 0.324603f, - -0.001211f, 0.375192f, -0.473448f, -0.162510f, 0.099329f, -0.277965f, - 0.101221f, -0.060263f, 0.121867f, -1.042140f, 0.440851f, 0.078898f, - -0.209007f, -0.243699f, 0.715197f, -0.093997f, 0.086022f, -0.178203f, - -2.275496f, -0.098413f, 0.199352f, -0.526791f, -0.162086f, -0.197806f, - -0.231657f, -0.269202f, -0.794294f, -0.223461f, 0.503584f, 0.416236f, - 0.064082f, 0.197655f, 0.340871f, -0.186645f, -0.291498f, 0.433938f, - -1.110063f, 0.003751f, 0.392738f, 0.069360f, 0.102088f, -0.302128f, - -1.518457f, 0.106939f, 0.404527f, -0.306868f, -0.286928f, 0.729276f, - -0.531710f, 0.745048f, -0.168837f, -1.953886f, -0.258828f, -0.190252f, - 0.241877f, -0.916744f, -0.030326f, -0.070541f, -0.271037f, 0.211303f, - -0.489957f, 0.100850f, 0.323999f, -0.802837f, -0.462408f, -0.079350f, - -0.029374f, 0.131213f, -0.825032f, 0.040202f, 0.351821f, 0.002869f, - -0.132516f, -0.471264f, -0.297002f, 0.263913f, 0.033478f, 0.146161f, - 0.533229f, -0.228608f, -0.200639f, -0.170955f, -0.915037f, 0.724491f, - 0.005151f, 0.018584f, -0.029771f, -0.396038f, -0.159236f, 0.038691f, - -1.197056f, 0.146302f, 0.226840f, -0.852126f, 0.031214f, 0.108880f, - 0.562000f, -0.134633f, -0.713343f, -0.342252f, -1.764521f, -0.114653f, - 0.515073f, -0.080515f, -0.121155f, -0.865139f, -0.833694f, -0.368553f, - 0.347673f, 0.623379f, 0.722067f, -0.492458f, -0.513263f, 0.585167f, - 0.721518f, -0.693499f, 0.343725f, -0.273861f, -0.040230f, -0.785664f, - -0.157500f, -0.308445f, 0.054062f, 0.600131f, -0.860887f, 0.434470f, - -0.191382f, -0.306150f, -0.243965f, 0.705444f, 0.007789f, -0.146154f, - -0.054499f, -0.073500f, -1.067364f, 0.404936f, -2.864590f, 0.182323f, - 0.326126f, 0.102405f, -0.135800f, 1.128095f, -0.012267f, -0.023996f, - -0.264834f, -0.108967f, -1.176746f, -0.926666f, 0.082999f, -0.498361f, - 0.083560f, -0.210074f, 0.019225f, -0.201614f, -0.904760f, 0.181421f, - 0.586384f, -0.177706f, 0.065471f, 0.168552f, 0.054705f, 0.045241f, - 0.048057f, -0.410957f, -2.188854f, -0.169812f, 0.015521f, 0.176856f, - -0.179331f, -0.352640f, -0.491735f, -1.743206f, 0.044227f, 0.010454f, - 0.823643f, -0.119781f, -0.098359f, 0.093119f, -}; - -static const float av1_ab_partition_nn_bias_128_layer1[LABEL_SIZE] = { - -0.433195f, -0.120488f, -0.116721f, 0.112134f, 0.118170f, -0.259769f, - -0.077530f, 0.394044f, 0.279167f, -0.317988f, 0.189538f, 0.314776f, - 0.325655f, -0.107123f, 0.591049f, 0.358744f, -}; - -static const NN_CONFIG av1_ab_partition_nnconfig_128 = { - FEATURE_SIZE, // num_inputs - LABEL_SIZE, // num_outputs - 1, // num_hidden_layers - { - 64, // num_hidden_nodes - }, - { - av1_ab_partition_nn_weights_128_layer0, - av1_ab_partition_nn_weights_128_layer1, - }, - { - av1_ab_partition_nn_bias_128_layer0, - av1_ab_partition_nn_bias_128_layer1, - }, -}; - -// nn model for ab partition pruning, 64x64. -static const float av1_ab_partition_nn_weights_64_layer0[FEATURE_SIZE * 64] = { - -0.495347f, -0.049498f, -0.026804f, 0.030474f, -0.289308f, -0.264193f, - -0.141121f, -0.072562f, -0.391665f, -0.051491f, -0.234761f, 0.027155f, - -0.038217f, 0.014872f, -0.289728f, -0.233577f, -0.415875f, -0.343615f, - -0.442543f, -0.482492f, 0.073510f, 0.007503f, 2.162329f, -0.362849f, - 2.145915f, -0.883135f, 0.185636f, -0.062859f, -0.465574f, -0.486205f, - -0.056710f, -0.330642f, -0.321860f, 0.042321f, -0.348965f, 0.003542f, - -0.291365f, -0.078164f, -0.345093f, -0.220272f, -0.471270f, -0.763853f, - 0.246622f, 0.199651f, -0.663420f, -0.154152f, -1.220383f, 0.047138f, - 0.816811f, 0.083247f, -0.218839f, 0.038143f, -0.063436f, 0.015517f, - -0.307320f, -0.166956f, -0.169499f, -0.399005f, -0.234638f, -0.162266f, - 0.050425f, -0.221723f, -0.256942f, -0.287285f, 0.144011f, -0.033245f, - 0.083649f, 0.119428f, -0.056706f, -0.117805f, 0.021866f, -0.257300f, - -0.201378f, -0.217484f, -0.413780f, -0.145793f, 0.082792f, -0.347247f, - 0.042539f, -0.302697f, 1.652316f, 0.000701f, -0.482843f, -0.160332f, - -0.450099f, 0.212399f, -4.715360f, -5.336774f, -5.375758f, -6.048339f, - 0.085956f, -0.037767f, 1.052409f, -0.931924f, -2.221907f, 0.268946f, - 0.015512f, 1.237094f, -1.092185f, 0.418247f, -0.082143f, -0.076914f, - -0.060749f, -0.325440f, -0.296960f, -0.066815f, -0.158477f, -0.373945f, - -0.122322f, -0.113495f, -0.097978f, -0.192816f, -0.270418f, 0.035840f, - -0.015458f, -0.121071f, -0.279582f, -0.067683f, 0.097855f, 0.019839f, - 0.451127f, 0.004376f, 1.410392f, 3.255835f, -0.344815f, 0.145202f, - 0.204132f, 0.171948f, -0.527736f, -0.110353f, 0.901448f, 0.003238f, - -3.822090f, 0.235462f, 1.024823f, -0.821244f, 0.876056f, 2.553762f, - -3.478597f, -2.076582f, -0.265515f, -0.055923f, -0.156980f, -0.164097f, - -0.246040f, 0.039430f, -0.071769f, -0.118847f, -0.304053f, -0.281541f, - -0.226021f, -0.263091f, -0.127359f, -0.249410f, -0.051023f, 0.083911f, - 0.084721f, 0.168089f, -0.272169f, -0.204998f, -0.008303f, -0.173998f, - 0.079376f, -0.197426f, -0.199052f, -0.118794f, -0.063753f, -0.094769f, - 0.066176f, -0.175832f, -0.238752f, -0.287960f, -0.134307f, -0.185953f, - -0.385845f, 0.119769f, -0.006567f, -0.382126f, -0.214221f, 0.038449f, - -0.253484f, -0.282766f, -0.020249f, -0.193929f, 0.016281f, -0.114423f, - -0.145940f, -0.281621f, -0.007588f, -0.131470f, -0.189012f, -0.185699f, - -0.279011f, -0.008132f, 0.208463f, 0.020569f, -0.206803f, -0.213408f, - -0.206131f, -0.290245f, 0.069701f, -0.000371f, -0.307572f, -0.451785f, - -0.300838f, -0.453186f, -0.301691f, 0.046327f, -0.312668f, 0.058272f, - -0.303131f, -0.376252f, 0.108384f, -0.086623f, -0.100630f, -0.027330f, - -0.003969f, 0.089502f, -0.200722f, -0.107889f, 0.061843f, -0.008478f, - -0.265057f, -0.271132f, -0.073562f, 0.129337f, -0.283698f, -0.353414f, - 0.076420f, -0.244280f, -0.119537f, -0.105366f, -0.184692f, -0.038817f, - -0.478507f, -0.118808f, -0.472979f, -0.305884f, -0.462813f, -0.189581f, - -0.011932f, -0.585700f, 0.253212f, -1.061900f, -0.205116f, -0.336407f, - -0.762199f, 0.577737f, 0.230832f, 0.434440f, -0.096713f, 0.038552f, - -0.147800f, -0.213553f, 0.041740f, -0.281907f, -0.026154f, -0.082356f, - -0.331871f, -0.408247f, -0.129022f, -0.037550f, -0.310233f, -0.320883f, - -0.391963f, -0.467392f, 0.027453f, -0.394761f, -0.045544f, 0.076052f, - 0.483985f, 0.067093f, 0.141361f, 0.576772f, 0.859718f, 2.566515f, - -0.025476f, 0.769738f, -0.680235f, -1.683309f, -2.394131f, -0.000714f, - -0.615021f, -0.195856f, -0.434035f, -0.295010f, -0.668659f, -0.245959f, - 0.551148f, 1.777227f, -0.461630f, 0.043093f, 0.012293f, -0.255841f, - -0.097070f, -0.371156f, -0.146323f, -0.015508f, -0.103873f, -0.087476f, - -0.297266f, -0.128699f, -0.149555f, 0.016534f, -0.375498f, -0.346759f, - -0.455156f, -0.147509f, -0.427076f, -0.354431f, -0.158025f, -0.164604f, - -0.237038f, -0.010314f, -0.092884f, -0.397084f, -0.217980f, -0.127184f, - -0.048421f, -0.144133f, 0.889073f, 0.012606f, 3.007608f, -0.602584f, - -1.849480f, -0.373159f, -1.890695f, -3.609938f, 0.811923f, -1.867208f, - -0.244326f, -0.018012f, -0.211192f, -0.220196f, 0.169363f, 0.119141f, - -0.230715f, 0.083247f, 0.020367f, -0.128629f, -0.217455f, -0.159640f, - 1.815952f, -0.369238f, -1.186447f, -0.658753f, -0.511026f, -0.096934f, - 0.662971f, 0.486475f, 0.159746f, -0.018932f, 3.692397f, 1.384353f, - -0.401984f, -0.248380f, -0.140861f, 0.215248f, -0.023711f, 0.059679f, - -0.072260f, 0.004271f, 0.039545f, -0.347971f, -0.081851f, -0.474896f, - -0.181572f, 0.066736f, -0.157822f, -0.163760f, -0.171113f, -0.089935f, - -0.338281f, -0.421444f, -0.306687f, -0.085283f, -0.377953f, -0.138750f, - -0.102701f, -0.312336f, 0.149831f, 0.007229f, -0.155700f, -0.173611f, - 4.074261f, 1.342306f, -1.272712f, 1.570899f, -0.545093f, -0.317605f, - -0.189440f, -0.133910f, -0.273190f, -0.108020f, -0.166107f, 0.021413f, - -0.239130f, -0.067211f, 0.041957f, -0.039234f, -1.003587f, -0.094412f, - 0.532512f, -0.870538f, -1.118023f, -1.160983f, -0.736307f, -0.418752f, - 0.419466f, 0.492122f, -0.004368f, -0.022096f, -1.115132f, 0.150886f, - 2.396852f, 2.660000f, -0.376537f, 0.468628f, 0.149413f, -0.074898f, - -0.067154f, 0.021245f, 0.127857f, 0.294189f, 0.508056f, 0.390232f, - -3.899177f, -3.414681f, -3.929195f, -4.160545f, -0.274323f, -0.052583f, - -0.003545f, -0.433084f, -0.404891f, -0.145051f, -0.312367f, 0.004579f, - -0.398724f, -0.372068f, -0.234279f, 0.017799f, -0.424760f, -0.646717f, - -0.047568f, 2.924664f, -0.644165f, 0.359349f, -0.294800f, 0.591746f, - -0.404710f, -0.092358f, -0.250729f, 0.030829f, -0.147149f, -0.476023f, - -0.071803f, -0.482516f, -0.293117f, -0.215923f, -0.373122f, -0.085315f, - -0.377052f, -0.449899f, -0.056452f, 0.138081f, -0.085350f, -0.308391f, - 0.106661f, 0.176234f, 0.258869f, -0.230172f, -0.233029f, -0.241208f, - -0.067509f, -0.223172f, -0.118353f, -0.302478f, -0.579632f, -0.561326f, - -0.158114f, -0.223167f, -0.026689f, 0.051863f, 0.212834f, -0.304714f, - -0.169071f, -0.193695f, -0.075682f, -0.170860f, -0.241008f, -0.044648f, - 0.280815f, -0.002585f, -0.283552f, -0.037701f, -0.681169f, -0.274535f, - -0.380595f, 0.109504f, -0.111141f, -0.437685f, -0.094459f, 0.144206f, - -0.106139f, -0.211832f, -0.054742f, -0.172813f, -0.295905f, -0.071907f, - -0.418429f, -0.183240f, 0.031319f, -0.095785f, -0.315447f, 0.069404f, - -0.422910f, -0.029867f, -0.357321f, -0.199976f, -0.337707f, -0.070188f, - -0.178198f, 0.177208f, 0.134688f, -0.081933f, -0.229452f, -0.208872f, - 0.026287f, -0.364040f, -0.063696f, -0.227443f, -0.234401f, -0.205699f, - -0.267238f, -0.494125f, -0.056255f, 0.053715f, -0.487754f, 0.014818f, - 0.087383f, -0.077556f, -0.168085f, -0.436851f, -0.276286f, -0.137845f, - -0.107606f, -0.103653f, -0.233766f, -0.419083f, 0.169185f, 0.010186f, - -0.001587f, 0.086735f, -2.465718f, 1.482185f, 1.621193f, -2.081680f, - 1.386553f, -3.204335f, -0.267111f, -0.004508f, 0.164712f, 0.274147f, - 1.724306f, -2.273659f, 0.749574f, -0.891905f, 0.105965f, -0.030428f, - -0.416018f, -0.300762f, 0.122911f, -0.316908f, -0.292504f, 0.138666f, - -0.161327f, -0.042143f, -0.249128f, 0.149210f, -0.088987f, -0.654101f, - -1.501843f, 0.216777f, 0.955914f, 0.524158f, -1.642561f, -1.643626f, - 0.864797f, -0.425451f, -2.115764f, -0.012502f, 0.065172f, 1.297270f, - 0.018845f, 1.167276f, -0.470970f, -0.244995f, 0.374782f, -1.811056f, - -0.055430f, -0.024102f, -0.376519f, -0.339640f, -0.119177f, -0.277995f, - -0.290095f, -0.081362f, -0.144139f, -0.118037f, -0.180357f, -0.217559f, - -0.370683f, 0.172816f, -0.265069f, 0.194321f, -0.273478f, 0.037442f, - -0.235552f, -0.078625f, -0.447541f, 0.016836f, -0.271123f, -0.171481f, - -0.321477f, -0.184826f, -0.442981f, -0.227273f, -0.370666f, -0.237232f, - -0.257493f, -0.225714f, -0.153716f, -0.283487f, -0.155399f, 0.067697f, - 0.230343f, -0.034318f, -0.022687f, -0.047090f, -}; - -static const float av1_ab_partition_nn_bias_64_layer0[64] = { - -0.212182f, -0.233725f, -0.758846f, -0.158162f, 0.614743f, -0.150944f, - -0.075727f, -0.208414f, 1.054996f, 0.713758f, -0.300051f, -0.151482f, - -2.443570f, 0.430590f, -0.129001f, -0.160733f, -0.230547f, -0.143228f, - -0.140577f, -0.086812f, -0.212298f, -0.159557f, -0.055647f, -0.211423f, - 0.578161f, -0.220318f, -0.210107f, -3.111584f, 0.604419f, -0.232622f, - -0.209924f, -0.130794f, -0.084097f, -0.036005f, 0.294594f, -2.535531f, - -0.209783f, -0.211189f, -2.766337f, 0.000000f, 0.450177f, -1.754884f, - 3.262664f, -0.209691f, -0.614886f, -0.211257f, -0.109096f, -0.190492f, - -0.109007f, -0.026910f, -0.136035f, -0.212321f, -0.139320f, -0.212233f, - -0.305430f, 0.739171f, 0.991277f, -0.088150f, 0.086313f, -0.023379f, - -0.125366f, -0.063576f, -0.212169f, -0.047463f, -}; - -static const float av1_ab_partition_nn_weights_64_layer1[64 * LABEL_SIZE] = { - -0.036800f, 0.528721f, 0.490767f, 0.144409f, 1.103640f, 0.361910f, - -0.180069f, 0.068033f, -14.868382f, 0.359013f, 0.322567f, -0.199212f, - 0.906164f, -0.488254f, 0.149653f, -0.216394f, -0.099347f, 0.004936f, - -0.111391f, 0.074848f, -0.041709f, 0.147627f, -0.018905f, 0.096116f, - 0.184817f, -0.016241f, 0.115739f, 2.376754f, 0.637097f, 0.052954f, - 0.136428f, 0.225267f, -0.181873f, -0.142876f, 0.684048f, 0.658791f, - 0.105795f, 0.241705f, 1.381114f, -0.209379f, 1.145949f, 0.795293f, - -9.361877f, 0.198302f, 0.539600f, 0.092317f, -0.081695f, 0.200777f, - 0.102334f, 0.081583f, 0.060948f, -0.025110f, 0.160951f, -0.020170f, - 0.234006f, -0.029369f, 0.375036f, 0.270209f, -0.556529f, 1.402949f, - 0.101777f, -0.027331f, 0.004502f, -0.153166f, -0.116651f, 0.151573f, - -0.022187f, 0.144044f, -0.108719f, -0.129942f, -0.270321f, 0.227363f, - 1.892330f, -0.661052f, -0.219398f, -0.229417f, -0.856438f, -1.196988f, - -0.081774f, 0.078847f, -0.207057f, -0.048947f, 0.152073f, -0.243056f, - -0.233329f, -0.288689f, -0.158333f, -0.141177f, -0.715436f, 0.016947f, - -0.093752f, 0.204984f, -1.209782f, 0.155683f, 0.092239f, 0.146495f, - 0.813146f, -0.027757f, 0.330982f, 2.173948f, -0.028867f, -0.141815f, - 0.292708f, -0.204794f, 0.014496f, 1.032799f, 1.312155f, 0.107020f, - 0.824752f, -0.013945f, 0.184829f, -0.041633f, 0.215300f, -0.476088f, - -0.053213f, 0.126862f, -0.020777f, 0.082893f, -0.223727f, -0.923063f, - 0.466529f, 0.082140f, -0.845758f, -1.140791f, -0.262033f, 0.138491f, - 0.151717f, -0.182479f, -0.131128f, 0.055411f, 0.106771f, 0.125552f, - 0.297184f, -0.257403f, -0.059884f, -0.274903f, 2.694357f, -0.108244f, - 0.025377f, 0.043092f, -0.558317f, 3.517159f, -0.270833f, -0.240676f, - 0.205100f, -0.057068f, -0.140445f, -0.193449f, -0.030061f, -0.286762f, - -0.467523f, -0.012647f, 0.190564f, 0.022394f, -0.101479f, 0.339684f, - -0.902743f, -0.169578f, -0.178029f, -0.041836f, -3.952108f, -0.028298f, - -0.221137f, -0.733895f, -0.223895f, 0.039012f, 0.687867f, 0.021423f, - 0.113063f, 0.676087f, -0.961000f, -0.064847f, 0.712856f, -0.192765f, - -0.001132f, 0.016689f, -0.236020f, -0.766186f, -0.175729f, 0.012879f, - -0.251064f, -0.105523f, -0.039212f, -0.347584f, 0.304352f, -0.034174f, - -0.364258f, -0.685252f, -0.266115f, -0.247345f, -0.155905f, 0.152283f, - -0.156315f, 0.174082f, -0.757654f, 0.102303f, -2.192316f, -0.245815f, - 0.119882f, -0.086542f, 1.987246f, -1.353163f, -0.374813f, -0.233504f, - -1.980895f, 0.692093f, -0.168351f, 0.172700f, -0.009052f, -0.015734f, - 0.106679f, -0.060472f, -0.256813f, -0.074874f, -0.207488f, -0.329515f, - -0.418268f, -0.017940f, -0.036081f, 0.064719f, -1.488016f, 0.020591f, - -0.176325f, -0.141074f, 0.944494f, 0.150237f, -0.249805f, -0.277280f, - 0.012686f, 0.132483f, 0.116123f, 0.013737f, -0.116091f, 0.750340f, - 3.251343f, -0.188864f, 1.096992f, 0.058467f, -0.041433f, -0.037937f, - -0.133294f, -0.137908f, -0.171132f, 0.106362f, 0.069383f, -0.052662f, - -0.177883f, -0.408049f, 0.680221f, -0.117035f, -0.904240f, -1.395228f, - 0.154527f, 0.134427f, 0.022767f, -0.158886f, -0.230316f, 0.161096f, - 0.362213f, -0.235060f, -0.941620f, 0.055912f, -0.049458f, -0.166632f, - 0.481418f, 0.930146f, 0.041108f, 0.033674f, 1.372066f, -1.847709f, - 0.003324f, 0.259534f, 0.177014f, -0.202761f, -0.262017f, -0.190852f, - -0.102839f, 0.028338f, 0.187193f, -0.041684f, 0.123973f, -0.198576f, - -0.110369f, -1.431400f, 0.208369f, -0.302370f, -0.248549f, 0.062985f, - 0.673409f, 0.036662f, -0.711340f, -0.120584f, -0.189789f, 0.098812f, - 2.947819f, 0.216567f, -0.414472f, -0.181742f, 1.873779f, -0.222726f, - -0.782870f, 0.007889f, 0.015062f, -0.554328f, 0.182928f, -0.191430f, - 0.123636f, -0.215460f, -0.225245f, 0.251516f, -0.013025f, -1.359595f, - -0.750602f, 0.342667f, -0.141899f, -0.687493f, -0.072639f, 0.048018f, - -0.242107f, -0.031917f, -0.287472f, -0.046088f, 0.832197f, -0.016576f, - -1.553349f, -0.216341f, 0.023077f, -0.410867f, 4.243743f, -0.514878f, - -0.066007f, -0.160696f, -0.262678f, -0.648790f, -0.430586f, 0.199940f, - -0.202496f, -0.222241f, -0.016406f, -0.121473f, 0.000828f, -0.081584f, - -0.152641f, -0.190166f, 0.644400f, 0.040196f, -0.302104f, -1.143654f, - -0.160327f, -0.320780f, -0.187006f, 0.037311f, 0.440618f, -0.070733f, - -0.117785f, 1.527539f, -0.419310f, 0.001300f, 1.389956f, -0.036366f, - -0.269203f, 0.612265f, 2.721897f, -0.086836f, -0.446999f, 0.012525f, - -0.078317f, -0.287052f, -0.111188f, -0.085181f, -0.164667f, -0.010466f, - -0.569722f, -0.018888f, -0.101663f, -1.147130f, -0.465204f, 0.114524f, - -2.192402f, -0.221325f, 0.375748f, 0.206284f, -0.261548f, -0.246257f, - -0.143004f, -0.069981f, -0.057306f, -0.116481f, -0.435903f, -0.314970f, - 0.013210f, -0.010175f, 4.630571f, -0.473226f, -0.197199f, -0.028204f, - 0.122907f, 2.475548f, 0.025011f, -0.092603f, -0.127561f, -0.151330f, - -0.077295f, 0.245016f, -0.045005f, 0.183396f, -0.330556f, -0.384887f, - 0.356374f, -0.016618f, -0.463353f, -1.291546f, -0.071986f, -0.311599f, - 0.072385f, -0.430786f, -2.094788f, 0.202733f, -0.910109f, -1.336543f, - -0.086800f, -0.096413f, 1.544383f, 0.031860f, -0.796211f, 0.762786f, - 3.250022f, -0.441798f, -0.698537f, 0.062839f, 0.033525f, -0.362996f, - 0.027022f, -1.131264f, -0.228926f, 0.053885f, -0.338628f, 0.155037f, - -0.046844f, -0.888172f, -0.241767f, 0.084965f, -0.617743f, -0.049896f, - -0.036894f, -0.304783f, -0.002639f, 0.137957f, 0.052121f, -0.131161f, - -0.117200f, -0.253380f, -0.205561f, -0.302450f, -0.047397f, -0.330518f, - 3.613420f, -1.525951f, -0.026738f, 0.209150f, -2.103534f, 2.019689f, - -0.366199f, -0.095260f, 0.027417f, -0.242512f, 0.162579f, 0.052113f, - -0.293851f, -0.068138f, -0.005799f, -0.344696f, -0.114824f, -0.431107f, - -0.120058f, -1.139926f, -1.048379f, 0.036446f, -0.323020f, -0.432945f, - 0.454151f, -0.140058f, 0.050649f, -0.094900f, -0.017278f, -0.238719f, - 1.193153f, 0.120447f, -0.496061f, 0.917431f, 2.936126f, -0.115521f, - -0.347397f, -0.435325f, -0.004383f, -0.211864f, 0.162383f, -1.040726f, - 0.089537f, -0.128579f, -0.133505f, 0.107129f, -0.435657f, -0.180388f, - 0.043650f, 0.018709f, -0.773242f, -0.687192f, -0.120633f, -0.063626f, - 0.029912f, 0.113972f, -0.403502f, -0.127640f, -0.269625f, 0.129794f, - -0.188539f, 0.041641f, 0.029769f, -0.198374f, 1.401407f, 0.353887f, - -0.219925f, 0.260515f, 1.157034f, -2.992044f, -0.097618f, -0.064417f, - -0.203626f, -0.008217f, -0.112339f, -0.227407f, -0.155118f, 0.247705f, - -0.012304f, -0.248447f, -0.913463f, -0.064788f, -0.214619f, -0.251761f, - -0.386861f, -0.040574f, -0.163219f, -0.100700f, 1.488274f, -0.071684f, - -0.033626f, -0.006497f, -0.246945f, -0.145221f, -3.747390f, 0.149609f, - -0.263326f, -0.297385f, -1.039896f, -0.083174f, -0.025473f, -0.235586f, - -0.001087f, 0.254286f, 0.265106f, 0.007325f, 0.199239f, 0.134103f, - -0.578211f, -0.259801f, -0.062373f, 2.368348f, 0.560556f, -0.252260f, - 0.889997f, -0.447872f, -0.059218f, -0.095315f, -0.061667f, 0.183580f, - -0.157479f, 0.055387f, -0.831734f, 0.007606f, -1.104906f, 0.301180f, - -0.117115f, 0.212959f, 4.727223f, -0.243833f, -0.397495f, -0.025021f, - -0.367587f, -2.082058f, -0.217699f, 0.148111f, 0.252430f, 0.111088f, - -0.260692f, 0.095124f, -0.407774f, -0.322169f, 0.002927f, 0.126169f, - -1.272325f, -0.279772f, -0.373680f, -0.485177f, -0.605458f, 0.021225f, - -0.092031f, -0.226585f, 1.895162f, 0.037866f, -0.275475f, 1.614360f, - -0.014972f, -0.277679f, -3.449082f, -0.092060f, -0.747873f, 0.020716f, - 2.776178f, -0.049963f, 0.183999f, -0.295259f, -0.028868f, 0.221895f, - 0.001265f, 0.336823f, 0.219372f, 0.112824f, 0.408132f, -0.017940f, - -0.311666f, 1.489606f, -0.058093f, -0.305659f, -0.491933f, -0.143847f, - 0.166115f, 0.042867f, -0.123447f, -0.087099f, -0.305395f, -0.365079f, - -0.755801f, -0.160649f, 0.736260f, -0.008611f, 0.095836f, -0.017345f, - 5.697515f, -0.498971f, -0.125280f, 0.199907f, 0.300053f, 0.605026f, - -0.228225f, -0.259523f, 0.016384f, 0.146973f, 0.210258f, 0.226766f, - -0.075178f, -0.050924f, 0.188496f, -0.415266f, -0.484880f, -0.236384f, - 0.071931f, -0.331863f, -0.601243f, -0.232479f, -0.285272f, 0.123789f, - -1.341333f, 0.037082f, -0.315202f, -1.587215f, -0.271576f, 0.003216f, - -4.437186f, -0.256205f, -0.576589f, -0.114147f, 2.153916f, -0.369618f, - 0.271415f, 0.145036f, -0.158731f, -0.240938f, -0.187369f, 0.036325f, - 0.254771f, 0.211488f, -0.240297f, 0.098417f, -0.415011f, 2.334793f, - -0.127252f, 0.020069f, -0.168755f, -0.448922f, -0.219207f, 0.016232f, - -0.221935f, -0.269500f, -0.100636f, 0.102545f, -0.809376f, -0.054979f, - 0.360713f, -0.326541f, 0.112933f, 0.138073f, 4.229404f, -0.763801f, - -0.305429f, 0.199955f, -1.787713f, 0.272866f, 0.109895f, 0.138466f, - -0.250259f, -0.167162f, -0.212588f, -0.217589f, -0.067125f, -0.077490f, - -0.208970f, -0.006863f, -0.671146f, -0.298320f, -0.165509f, 0.044597f, - -1.408624f, -0.213957f, -0.220947f, 0.129718f, 1.316777f, -0.098928f, - -0.008121f, -0.558293f, -0.297290f, -0.218873f, -4.346638f, -0.228174f, - -0.204710f, -0.388864f, 2.697919f, 0.025260f, 0.857020f, 0.009921f, - 0.036915f, -0.320275f, -0.087937f, 0.022636f, 0.236667f, 0.135496f, - -0.059616f, -0.192955f, 0.009470f, 2.139589f, -0.200449f, 0.129818f, - 1.017444f, -0.608299f, 0.257914f, -0.134306f, -0.033327f, 0.002855f, - -0.338598f, 0.015559f, 0.117362f, -0.166760f, 0.086903f, -0.167666f, - 0.193523f, 0.033852f, -1.147686f, 0.489468f, -0.006969f, 0.125630f, - 1.557907f, -1.604449f, -0.071114f, 0.096178f, 0.007065f, 0.200013f, - 0.213393f, 0.168466f, -0.100568f, -0.117861f, -0.161542f, -0.072561f, - -1.069871f, -0.470138f, -0.352578f, -1.503513f, -0.001394f, -0.380109f, - 0.065089f, -0.281668f, 0.988953f, -0.002778f, -0.659026f, -0.470692f, - -0.407292f, 0.011710f, -1.362085f, 0.184738f, -0.135786f, -1.374241f, - 4.487930f, -0.067274f, -0.956404f, -0.233995f, 0.224527f, -0.454556f, - 0.037900f, -0.281658f, 0.208224f, -0.254753f, 0.045740f, 0.051444f, - -0.388281f, 0.257112f, -0.485030f, -0.082659f, 0.148103f, -1.007456f, - -0.022295f, 0.036984f, -0.369401f, -0.076943f, -0.007636f, -0.293022f, - 0.470466f, 0.199012f, -2.158182f, 0.036577f, -0.014725f, -0.229516f, - 2.236929f, 0.030945f, -0.400045f, 0.109348f, 0.214691f, -0.891516f, - -0.251379f, -0.217358f, 0.013733f, 0.205573f, -0.151725f, -0.191782f, - -0.339630f, -0.163905f, -0.119191f, -0.032516f, 0.503015f, 0.025772f, - 0.029094f, -1.146153f, 0.216723f, -0.330023f, 0.064695f, -0.262521f, - 0.425612f, -0.093080f, -0.489648f, 1.051293f, -0.092332f, 0.095557f, - -0.874132f, 0.218483f, -0.127648f, -1.605802f, 2.763617f, -0.186734f, - -1.243166f, -0.193514f, -0.173748f, 0.337822f, 0.183873f, -0.251594f, - -0.211582f, 0.144081f, 0.029620f, -0.024853f, -0.385140f, 0.467341f, - -0.928316f, -0.195442f, 0.917783f, 0.357084f, 0.174445f, -0.073659f, - -0.012811f, -0.115420f, -0.181147f, -0.364449f, -0.567395f, -0.012969f, - -1.680714f, 0.065323f, 0.198063f, -0.244201f, 1.428545f, -0.432539f, - -0.208931f, -0.091205f, 0.957125f, 0.813519f, -0.262677f, 0.246852f, - 0.015536f, 0.055026f, 0.067054f, 0.262103f, -0.358115f, -0.095206f, - -0.267522f, -0.402710f, -0.680397f, -0.123627f, -0.385590f, -1.504680f, - -0.169513f, -0.215338f, 0.043633f, -0.079052f, -0.464410f, 0.122894f, - -0.278231f, -2.456445f, -0.159917f, -0.015597f, -0.735449f, -0.078854f, - -0.400290f, -1.153870f, 3.657228f, -0.287093f, -1.174355f, -0.102001f, - -0.288281f, 0.185209f, -0.145228f, -0.200449f, -0.099914f, -0.138354f, - 0.254428f, -0.161751f, -0.118206f, 0.296043f, -0.482613f, 0.080932f, - 1.097605f, -0.010190f, 0.232439f, 0.447617f, -0.133508f, 0.115763f, - -0.388589f, 0.174695f, -0.236014f, 0.006284f, -1.374129f, 0.092015f, - -0.241419f, -0.231667f, 2.763950f, -0.922932f, -0.061605f, 0.208740f, - -1.597190f, 1.353325f, -0.198528f, 0.250498f, -0.013950f, -0.203861f, - -0.254563f, 0.081931f, -0.413369f, 0.011844f, 0.080961f, -0.231161f, - -1.234909f, -0.440843f, -0.174980f, -0.315283f, -0.337474f, -0.123243f, - -0.310001f, -0.271028f, 0.364179f, 0.022845f, -0.535517f, -0.772936f, - -0.188435f, 0.039667f, -0.807463f, 0.266550f, -0.288857f, -1.630789f, - 1.280155f, 0.065712f, -0.279960f, -0.300056f, 0.258440f, -0.073781f, - 0.213878f, 0.042196f, 0.021360f, 0.211698f, -0.003751f, -0.192673f, - -0.137008f, 0.247878f, -0.470604f, 0.073164f, 1.523241f, 0.734755f, - -0.114126f, -0.193834f, -0.025759f, 0.263183f, -}; - -static const float av1_ab_partition_nn_bias_64_layer1[LABEL_SIZE] = { - -0.343508f, -0.706936f, -0.160676f, -0.877101f, -0.517567f, -0.253254f, - -0.148074f, 0.923430f, -0.364770f, 0.203550f, 0.401216f, 0.938246f, - -0.872737f, 0.718723f, 0.703398f, 2.560015f, -}; - -static const NN_CONFIG av1_ab_partition_nnconfig_64 = { - FEATURE_SIZE, // num_inputs - LABEL_SIZE, // num_outputs - 1, // num_hidden_layers - { - 64, // num_hidden_nodes - }, - { - av1_ab_partition_nn_weights_64_layer0, - av1_ab_partition_nn_weights_64_layer1, - }, - { - av1_ab_partition_nn_bias_64_layer0, - av1_ab_partition_nn_bias_64_layer1, - }, -}; - -// nn model for ab partition pruning, 32x32. -static const float av1_ab_partition_nn_weights_32_layer0[FEATURE_SIZE * 64] = { - -0.323723f, -0.214013f, -0.007772f, -0.458851f, -0.125542f, -0.123860f, - -0.410973f, -0.209389f, -0.087580f, -0.272881f, -0.168500f, -1.130845f, - 0.344916f, -0.475017f, -0.362262f, -0.195662f, -0.566124f, 0.782163f, - 0.411575f, -0.013378f, -0.318650f, -0.124678f, -0.612909f, -0.315788f, - -0.263990f, -0.508783f, -0.048938f, -0.416407f, -0.402648f, -0.156644f, - 0.225887f, -0.000493f, 2.682241f, 0.871204f, 0.059014f, 0.803542f, - -1.407028f, -1.154669f, 1.388148f, -0.293348f, -0.003669f, -0.009607f, - 1.330030f, -0.337841f, 2.118617f, 1.033059f, -0.084788f, 0.212904f, - 0.082405f, -0.070579f, -0.494005f, -0.173392f, 0.039546f, -0.463865f, - 0.077163f, -0.434066f, 0.030835f, -0.427139f, -0.560520f, -0.031606f, - -0.368541f, -0.027458f, 0.370574f, 0.461418f, 1.087682f, -0.572137f, - -1.509596f, -0.765697f, -0.499383f, -0.277998f, -0.106492f, -0.129564f, - -0.169133f, -0.269834f, -0.114270f, -0.275431f, 0.016339f, -0.156744f, - -0.267922f, 0.171216f, 0.110556f, 0.002954f, -0.200327f, -0.187663f, - 3.691601f, 1.234152f, 0.186315f, -0.125370f, -0.211235f, -0.554432f, - -0.131072f, -0.124982f, -0.130339f, -0.235350f, 0.018903f, 0.012896f, - -0.159372f, -0.269571f, -0.025709f, -0.221251f, 0.061919f, 0.016307f, - 0.384673f, -0.134525f, -1.599126f, -0.416459f, -0.743052f, 0.670249f, - -0.169709f, 0.421681f, -0.033360f, -0.072817f, 0.003647f, -0.110632f, - -0.158651f, -0.095136f, 0.223759f, 0.165767f, -0.269129f, -0.196075f, - -0.023183f, -0.293420f, 0.014875f, 0.018688f, -0.153407f, -0.172009f, - -0.259947f, -0.124015f, 0.173653f, -0.089103f, -0.021001f, -0.334230f, - 0.027177f, 0.103371f, -0.183860f, -0.204051f, -0.023721f, -0.192297f, - -0.143771f, -0.247106f, 0.218116f, -0.013240f, 2.831783f, 1.483928f, - -0.877025f, -0.313462f, -0.411320f, -0.447825f, 0.605977f, 0.234684f, - -0.119150f, -0.075182f, -0.330463f, 0.071503f, -0.254924f, -0.360071f, - -0.037022f, 0.063261f, -0.148759f, -0.238254f, -0.462018f, -0.027166f, - 0.065318f, -0.235743f, -0.257194f, -0.094784f, 0.022423f, 0.055925f, - 0.086672f, -0.021010f, 0.009965f, -0.001648f, -0.104917f, -0.387443f, - -0.102673f, -0.281706f, 0.145923f, -0.233391f, -0.378365f, -0.145584f, - -0.077751f, -0.121166f, 1.134565f, -0.097500f, -0.749202f, -0.544566f, - -1.361374f, -0.102494f, 1.089275f, 0.375299f, -0.105091f, 0.037641f, - -0.054248f, -0.282691f, -0.377797f, -0.066427f, -0.253815f, -0.329677f, - -0.339326f, -0.128217f, -0.282905f, 0.014937f, 1.067185f, -0.171764f, - 0.484458f, 0.396706f, -0.557055f, -0.891596f, -0.257839f, -0.720879f, - -0.218449f, -0.004755f, 1.572857f, 0.006229f, 1.962895f, -0.029746f, - -4.137691f, -2.185991f, -2.763477f, -0.520437f, -0.208708f, 0.006444f, - -1.263078f, -0.304560f, 1.072374f, 2.556429f, 0.312850f, 0.257488f, - -0.634264f, 0.156769f, -0.188943f, 0.040295f, -0.389915f, 0.085250f, - -0.248525f, 0.045667f, -0.776115f, -0.274680f, -0.448145f, -0.566161f, - -1.285316f, 0.079060f, 0.389124f, -0.510401f, -0.015299f, -0.664661f, - 0.099901f, -0.470694f, -0.051593f, -1.076381f, -0.442104f, -0.197867f, - -0.330011f, -0.448523f, -0.301018f, -0.442093f, -0.491953f, -0.582091f, - -0.064569f, -0.156516f, 0.543522f, -0.005924f, 0.161432f, 0.974793f, - 0.273712f, 1.104850f, -0.290312f, 0.313417f, -0.125370f, 0.136234f, - -0.191227f, -0.165054f, 0.011872f, -0.298871f, 0.095740f, 0.142760f, - -0.215771f, -0.031437f, 0.101041f, -0.085620f, 0.435387f, 0.002786f, - 1.971375f, 0.018392f, -1.771940f, -0.401433f, 0.808263f, -3.350013f, - 2.296952f, -1.024403f, -0.041645f, -0.034799f, -0.024078f, -0.347301f, - -0.276088f, -0.455907f, 0.266021f, 0.087348f, -0.146566f, 0.040492f, - -0.539866f, -0.206851f, -0.387874f, -0.125508f, -0.496676f, -0.373845f, - -0.472356f, -0.357082f, -0.081254f, -0.456466f, 0.554713f, 0.002185f, - -4.225019f, 0.344025f, 0.728796f, -0.262936f, 1.383924f, 1.577300f, - -2.653320f, -2.516156f, -0.301604f, -0.204105f, -0.138252f, -0.587536f, - -0.097889f, -0.352414f, -0.288276f, -0.184340f, -0.122741f, -0.243376f, - 0.031970f, -0.373402f, -0.396079f, 0.045566f, 0.072595f, -0.222681f, - -0.243802f, -0.340129f, -0.258494f, -0.192041f, -0.386112f, -0.240940f, - -0.047268f, -0.555802f, -0.032514f, -0.241341f, -0.167463f, -0.478308f, - -0.205936f, -0.316275f, 0.103729f, -0.197893f, -0.128029f, -0.218796f, - -0.167362f, -0.111814f, -0.126062f, -0.394260f, -0.025357f, -0.402697f, - -0.587395f, -0.400385f, -0.259664f, -0.415588f, -0.338503f, -0.399166f, - -0.270504f, 0.234505f, 0.272144f, 0.266938f, -0.392395f, -0.011717f, - -0.384221f, -0.473446f, -0.038420f, -0.241101f, -0.234402f, -0.275567f, - -0.410454f, -0.377599f, -0.179099f, -0.138432f, -0.248083f, -0.543026f, - -0.428043f, -0.239895f, -0.333193f, -0.103346f, -0.039038f, -0.171109f, - -0.119432f, -0.222351f, 0.000450f, 0.208724f, -0.510526f, -0.144656f, - -0.316721f, -0.344846f, -0.244794f, -0.129134f, -0.045634f, -0.400183f, - 0.043714f, -0.235414f, 0.115594f, -0.195616f, -0.106693f, -0.124242f, - 0.083990f, 0.049110f, -0.196130f, -0.059860f, -0.464235f, -0.516443f, - -0.101521f, -0.422379f, -0.413955f, -0.042991f, -0.345263f, -0.129264f, - -0.106911f, -0.140156f, -0.457841f, -0.199848f, -0.218954f, -0.329850f, - -0.364097f, -0.335262f, -0.312254f, -0.299331f, -0.052710f, -0.251019f, - -0.023459f, -0.222538f, 0.028849f, -0.088038f, -0.301550f, -0.273566f, - 0.067295f, -0.174608f, -0.445784f, -0.158366f, -0.567275f, -0.557652f, - -0.353503f, -0.302092f, -0.302049f, -0.551793f, -0.034535f, -0.225190f, - -0.210733f, -0.219377f, -0.057197f, -0.430933f, -0.025185f, -0.388150f, - -0.086147f, -0.430088f, 0.058466f, -0.152129f, -0.058411f, -0.236392f, - -0.547669f, -0.613849f, -0.893774f, -0.351715f, -0.399227f, -0.454909f, - -0.324501f, 0.000490f, -0.282167f, -0.073163f, -0.281452f, 0.047932f, - -0.175500f, 0.165220f, -0.276212f, 0.062153f, -0.217054f, -0.255487f, - -0.146416f, -0.097718f, -0.173809f, -0.559328f, -0.055695f, -0.391193f, - -0.132020f, -0.561184f, -0.308666f, -0.474053f, -0.219149f, -0.246558f, - -0.158325f, 0.151907f, -0.266835f, -0.144697f, -0.193960f, -0.046587f, - -0.220028f, -0.247355f, 0.135584f, 0.016511f, 0.367705f, -1.855877f, - 0.435622f, 0.444710f, -3.372301f, -3.030489f, 1.013267f, 0.380951f, - -0.170011f, -0.111415f, -0.456146f, -0.107254f, -0.095220f, -0.053078f, - -0.135864f, -0.591949f, -0.252810f, -0.324799f, -0.094796f, -0.260969f, - -0.391981f, -0.063170f, -0.336130f, -0.470127f, -0.405168f, -0.433219f, - -0.309563f, -0.295462f, -0.552270f, -0.012300f, -0.057793f, -0.034494f, - -0.446843f, -0.640160f, -1.188681f, -0.791361f, 0.543271f, 1.189112f, - 1.458468f, -0.005876f, -0.927475f, 0.062038f, -1.170818f, 0.338227f, - -3.007096f, -4.559296f, -4.045457f, -5.953635f, -0.228386f, -0.266890f, - -0.092595f, -0.377440f, -0.044534f, -0.053565f, -0.349268f, -0.415030f, - -0.310094f, 0.062721f, 0.251422f, -0.014350f, -1.282910f, 1.619560f, - 1.180566f, -0.032163f, -1.322951f, -0.603601f, 1.443710f, 0.654650f, - -0.393227f, 0.003536f, 0.029725f, -0.108925f, -0.053911f, 0.133977f, - -0.036145f, -0.168438f, 0.046989f, -0.331463f, -0.176983f, -0.311922f, - -0.272389f, -0.379592f, -0.399993f, -0.297873f, -0.193425f, -0.177524f, - -0.258309f, -0.567312f, -0.260217f, -0.241869f, 0.024010f, -0.032867f, - -0.039424f, -0.063670f, 0.193808f, -0.303514f, -0.013376f, -0.057761f, - 0.187922f, 0.006938f, 0.031810f, 0.180594f, -1.198427f, 2.820662f, - 0.154986f, -0.375518f, 0.116925f, -0.795782f, -0.085139f, -0.079365f, - -0.197936f, -0.321468f, -0.205271f, -0.558203f, -0.296235f, -0.151193f, - -0.158282f, -0.245402f, -0.208504f, -0.042335f, -0.087426f, -0.557129f, - -0.381427f, -0.441551f, -0.541011f, -0.060567f, -0.469305f, -0.032326f, - -2.453587f, -0.045568f, -0.296932f, 0.613061f, -0.320284f, 0.191620f, - -0.827145f, -0.225277f, 0.275800f, 1.696635f, -}; - -static const float av1_ab_partition_nn_bias_32_layer0[64] = { - -0.176206f, 0.660189f, -0.186156f, -2.481963f, -1.564218f, -0.280424f, - 0.732684f, -0.135581f, -2.193132f, -0.172771f, 0.605001f, -0.060392f, - -0.067190f, -0.132969f, -1.410812f, -0.298701f, -0.105963f, -0.086173f, - 0.632779f, 0.005585f, 1.310169f, 1.392136f, -0.563860f, -0.051053f, - 0.660998f, -0.214726f, -1.894342f, -0.128288f, -0.330721f, -0.053988f, - -0.177726f, 1.200859f, -0.178902f, -0.172620f, -0.184476f, -0.175559f, - 0.538503f, -0.322158f, -0.219080f, -0.058208f, -0.171347f, -0.216060f, - -0.174950f, -0.295740f, -0.184820f, -0.213896f, 1.317728f, -0.020116f, - -0.208096f, 0.000000f, 1.246166f, -0.225421f, -0.181555f, 0.861761f, - 1.172429f, -0.172892f, -0.737092f, -0.189904f, -0.179385f, -0.114618f, - -1.384604f, -0.201713f, -0.271948f, 0.372351f, -}; - -static const float av1_ab_partition_nn_weights_32_layer1[64 * 16] = { - -0.037828f, 1.529029f, 0.004927f, 1.475763f, 0.627172f, 0.325872f, - -0.990757f, 0.129476f, 0.889958f, -0.082031f, 0.332133f, 0.074422f, - -0.176212f, -0.074355f, 0.774378f, 0.110987f, -0.155469f, 0.253310f, - 0.882538f, 0.253605f, 0.332436f, -5.389474f, 0.278470f, 0.168644f, - 0.914611f, 0.154165f, 0.809262f, -0.174734f, 0.923673f, 0.064716f, - -0.070228f, -0.228735f, 0.002312f, 0.112222f, -0.045502f, -0.046004f, - 0.514101f, 0.306480f, 0.021232f, -0.015955f, -0.288260f, 0.189177f, - -0.104158f, 0.103273f, 0.096910f, -0.086328f, 1.327289f, -0.154247f, - 0.056676f, -0.243327f, -0.646676f, 0.177221f, -0.086761f, 0.729729f, - -14.710893f, -0.044881f, 0.339003f, -0.134737f, 0.073621f, -0.162913f, - 1.215237f, 0.140723f, 0.138630f, 1.241719f, 0.204092f, -0.463080f, - -0.176086f, 1.125868f, 1.034814f, 0.225455f, -0.203421f, -0.078787f, - -0.527498f, 0.012491f, -0.563307f, -0.170792f, 0.002679f, 0.116153f, - 0.211348f, -0.191900f, -0.212505f, 0.263445f, -0.074679f, -0.081441f, - -0.815405f, 2.448215f, 0.781299f, 0.149542f, -1.045162f, 0.043014f, - 0.217381f, -0.094500f, -0.090427f, 0.025784f, -0.228906f, -2.741798f, - 0.230475f, -0.256112f, -0.103297f, 0.159121f, -0.229793f, -0.014883f, - -0.104131f, -0.123816f, 0.164148f, -0.052279f, -0.071845f, -0.041197f, - 0.208527f, -0.234197f, -0.542336f, 0.020053f, 0.088870f, 0.014346f, - 2.502164f, -0.010244f, -0.267792f, 0.844394f, 2.711486f, -0.015262f, - -0.868053f, -0.295704f, 0.222289f, -0.000286f, -0.352098f, -0.079000f, - 0.021267f, -0.721739f, -0.240558f, -0.384775f, 0.065974f, -2.161058f, - 0.195889f, 0.268966f, -0.009329f, 0.014949f, 0.314943f, 0.235885f, - 0.072591f, -0.127120f, 0.150784f, 0.105697f, -1.297403f, -0.207509f, - -0.217688f, -0.076752f, 0.170952f, -0.294235f, 0.449973f, -1.712690f, - 0.860989f, 0.054757f, -0.812627f, -0.105316f, -0.736230f, -0.133192f, - -3.741608f, 0.495660f, -0.288936f, 4.654852f, -0.021305f, -0.308916f, - 0.049205f, -0.259996f, 0.114248f, -0.252647f, -0.253180f, -0.449314f, - 0.022979f, 0.063281f, -0.196154f, 0.078295f, -0.322317f, -0.145142f, - 0.300573f, 0.048385f, -0.254787f, 0.123939f, -1.263088f, -0.228565f, - -0.389061f, 0.391084f, 2.322438f, 0.075009f, 0.225743f, -0.198808f, - -0.280538f, -0.173939f, -0.120543f, -0.070792f, -0.417187f, -0.781056f, - -0.102756f, -1.760965f, 0.019149f, -0.867342f, 0.347141f, 0.031588f, - 0.302572f, -0.203573f, -0.357320f, -0.096078f, -0.527528f, 0.046699f, - -0.108561f, -0.167077f, -2.851509f, -0.307116f, 0.202720f, -0.160280f, - -0.215525f, 0.064355f, -0.427220f, 1.516230f, 0.634453f, 0.099400f, - -1.013887f, -0.029740f, -0.093426f, -0.044272f, -1.297636f, -0.237614f, - -0.160953f, 0.399036f, -0.030685f, -0.113619f, -0.184704f, 0.040519f, - -0.588252f, -0.210235f, -0.067623f, -0.031841f, -0.107261f, -0.192582f, - -0.253959f, -0.430821f, -0.103184f, -0.280185f, -0.357723f, 0.197761f, - -0.175087f, -0.055171f, 1.642014f, -0.192559f, -0.288147f, 0.610311f, - 4.688195f, -0.128728f, -0.914869f, -0.108286f, 0.013789f, 0.092125f, - 0.019770f, -0.178386f, 0.074164f, -1.152658f, -0.216738f, -0.277286f, - 0.012381f, 0.418259f, -0.680727f, -0.221917f, -0.485946f, 0.101672f, - 2.009457f, 0.054302f, 1.019838f, -0.116170f, 0.165134f, -0.112567f, - 0.852632f, -0.385796f, -0.108666f, 0.053181f, -0.311797f, -0.372875f, - -0.675717f, 2.409268f, -0.514720f, -0.214245f, -0.646596f, 0.009756f, - 0.203993f, 0.093617f, -0.301290f, 0.253551f, -0.128909f, -1.448442f, - -0.186823f, -0.278001f, -0.294993f, -0.176928f, -0.473605f, 0.062049f, - -0.212084f, -0.137326f, 0.012505f, 0.087850f, -0.200413f, -0.394119f, - -0.132224f, 0.146917f, 0.155746f, 0.198725f, -0.322541f, 0.196391f, - -0.945500f, 0.036736f, -0.155646f, -0.677341f, 1.130545f, -0.339554f, - 0.411628f, -0.355813f, -0.249843f, 0.213694f, -2.035607f, 0.055694f, - -0.111669f, 0.408696f, -0.067043f, -0.048182f, 0.398110f, -0.067542f, - 1.459801f, 0.236833f, -0.178806f, 0.168758f, 0.492387f, 0.099691f, - -0.776680f, -0.172865f, 0.204225f, 0.193982f, 0.575685f, -0.062248f, - 0.011486f, 0.058571f, -0.493391f, 0.026893f, -0.900467f, 3.793129f, - -0.634613f, -0.064660f, -0.048262f, 0.361905f, 0.033641f, 0.245171f, - -0.064671f, 0.034954f, 0.204358f, -0.904023f, -0.052714f, -0.250134f, - 0.136700f, 0.000734f, -0.371720f, 0.226483f, 0.217958f, 0.060559f, - 0.180111f, 0.000970f, 0.079556f, -0.096775f, 0.093855f, -0.026224f, - -0.243664f, 0.004290f, 0.123281f, -0.239476f, 1.230374f, -0.107826f, - -0.101982f, -0.153917f, 5.464427f, 0.304375f, -0.809957f, 0.090564f, - -0.278416f, -0.245555f, -2.078421f, 0.243093f, -0.127666f, 0.052451f, - -0.126662f, -0.783505f, 0.025149f, -1.422675f, -0.207769f, -0.362547f, - 0.115310f, 0.133390f, 1.264754f, -0.027055f, -0.485312f, -0.240717f, - -0.239722f, 0.146818f, -1.265043f, -0.235553f, 0.267104f, -0.021357f, - -0.435949f, -0.309371f, 0.049920f, 1.302721f, -0.233978f, -0.097551f, - -0.240631f, -0.287821f, -0.378380f, -0.273131f, -3.075169f, 0.226404f, - -0.029361f, 2.703590f, -0.430659f, 0.067927f, -0.387520f, -0.370630f, - -0.229236f, 0.085653f, -0.370956f, -0.065556f, -0.187859f, 0.068309f, - -0.109299f, -0.259898f, -0.103644f, -0.271199f, -0.209350f, 0.140993f, - -0.196713f, -0.135508f, -1.423209f, -0.406385f, -0.019956f, -0.864694f, - 5.963707f, -0.201157f, 0.726377f, -0.011076f, 0.010553f, -0.102918f, - -2.230088f, -0.258098f, -0.039547f, -0.029262f, -0.082324f, -0.860222f, - -0.094735f, -1.381839f, 0.587298f, -0.173048f, 0.721360f, 0.241900f, - 0.764302f, -0.023609f, -1.173755f, 0.103912f, -0.185363f, 0.078435f, - -2.245062f, -0.127269f, 0.202234f, 0.158975f, -0.260909f, 0.098608f, - -0.348247f, 1.732502f, -0.412298f, -0.269602f, -0.425771f, -0.146243f, - -0.530730f, 0.125716f, -1.004419f, 0.145109f, -0.059289f, 1.096304f, - 0.012891f, 0.045033f, -0.306875f, 0.003514f, -0.176110f, 0.037544f, - -0.441537f, -0.518921f, -0.262149f, -0.060407f, -0.379419f, -0.141245f, - -0.128894f, -0.176537f, -1.161318f, -0.249100f, -0.118330f, 0.042816f, - 1.173404f, 0.088312f, -0.393568f, -0.175134f, 6.529819f, -0.326652f, - -0.631917f, -0.393476f, 0.057781f, -0.217748f, -1.781139f, -0.012614f, - -0.212621f, -0.720322f, -0.218498f, -0.388556f, -0.254796f, -0.248399f, - -0.608744f, -0.265146f, 0.238517f, 0.066882f, -2.916806f, 0.054642f, - 0.282590f, 0.075248f, 0.010188f, -0.133486f, 0.985945f, -0.045849f, - -0.347564f, 0.057320f, -0.417920f, 0.063664f, 0.387062f, -2.692059f, - -0.535549f, 0.263736f, 0.327889f, -0.070273f, -0.775254f, 0.147250f, - 3.309425f, -0.212191f, -0.067204f, -2.912663f, -0.061496f, 0.084233f, - 0.022907f, 0.138421f, -0.112159f, -0.288447f, -0.010799f, 0.056049f, - -0.036527f, 0.021525f, 0.106649f, -0.291883f, 0.088424f, -0.057773f, - -0.086031f, 0.015277f, -0.318505f, -0.269049f, -1.008913f, -0.224785f, - -0.025820f, -0.649037f, 0.706381f, 0.096410f, 0.643776f, -0.046743f, - -0.009654f, -0.024246f, 1.469255f, -0.183536f, -0.370046f, -0.048442f, - -0.376527f, -0.431264f, -0.245109f, -0.093951f, 0.203683f, -0.099872f, - 0.087210f, 0.160692f, -3.527694f, -0.068891f, -0.228994f, -0.231817f, - -0.241949f, 0.193613f, 0.979597f, -0.091259f, 0.414424f, -0.047341f, - -0.209582f, -0.295134f, -0.016824f, 0.460327f, -0.072671f, 0.246234f, - 0.235896f, 0.127238f, -1.068683f, 0.035648f, 2.254888f, 0.180105f, - -0.260098f, -2.322120f, -0.184249f, -0.314801f, -0.099969f, -0.272117f, - -0.237916f, 0.031103f, -0.274063f, -0.049384f, -0.044917f, 0.102477f, - -0.342148f, -0.257558f, -0.346300f, 0.115333f, -0.115456f, 0.208354f, - -0.359301f, -0.167395f, 1.146514f, -0.177861f, -0.098658f, -0.444570f, - 6.759993f, -0.369772f, -0.831118f, 0.001866f, -0.073298f, -0.072095f, - 0.811902f, -0.431997f, -0.286587f, -0.269500f, 0.111492f, -0.525364f, - -0.351785f, -2.463474f, -1.852659f, 0.135325f, 0.138267f, 0.100643f, - -2.373278f, -0.285514f, -0.395388f, -0.185016f, -0.030249f, -0.005767f, - -0.716424f, -0.031674f, 0.011147f, 0.057405f, -0.215873f, -0.094401f, - 0.573528f, -1.223820f, 0.414852f, -0.059053f, -0.076488f, -0.287168f, - -0.842640f, 0.174084f, -0.567186f, 0.336629f, -0.062514f, 2.075448f, - -0.061680f, -0.131529f, -0.098994f, -0.204111f, -0.347865f, 0.108516f, - -0.049616f, -0.069212f, -0.273935f, -0.096545f, -0.210784f, -0.284698f, - 0.141501f, -0.176924f, -0.361341f, -0.251197f, -0.286694f, 0.245569f, - -1.521661f, -0.122639f, -0.015760f, -0.718912f, 5.877828f, 0.146916f, - 0.151767f, 0.220785f, -0.032298f, 0.230902f, 0.663943f, -0.252613f, - 0.057718f, -0.436038f, -0.323994f, -1.139787f, -0.042489f, -1.326298f, - -1.031206f, -0.104136f, 0.389897f, 0.127602f, -2.667789f, -0.212366f, - -0.506262f, -0.009115f, -0.213202f, 0.076167f, -1.629405f, 0.055129f, - 0.375393f, -0.150272f, -0.241515f, -0.326497f, 0.100069f, 0.410703f, - 0.340622f, 0.042437f, -0.349945f, 0.041176f, -1.178950f, 0.030992f, - 0.933908f, -0.035844f, -0.098660f, 1.030584f, -0.092043f, -0.355739f, - -0.305562f, 0.036161f, -0.049558f, -0.033225f, -0.403856f, -0.088276f, - 0.215493f, -0.149105f, -0.013363f, 0.025886f, -0.101306f, -0.205781f, - -1.072487f, -0.076019f, 0.077555f, 0.131003f, 1.267763f, -0.008954f, - -0.327617f, -0.246539f, 6.664081f, -0.404403f, -1.442489f, 0.191301f, - -0.336361f, 0.181156f, 0.833108f, 0.007879f, -0.194464f, -1.029408f, - -0.036268f, -0.927110f, -0.379190f, -0.293443f, -1.848579f, -0.242548f, - -0.065990f, 0.203160f, -0.291788f, 0.000680f, 0.587011f, -0.241289f, - 0.037034f, 0.000552f, 1.072308f, -0.387230f, -0.230050f, 0.292322f, - -0.720001f, 0.034109f, -0.467260f, 2.211644f, -1.839191f, -0.048797f, - -0.083469f, -0.334686f, -0.269056f, 0.051295f, 1.319904f, -0.035603f, - -0.018457f, -0.824915f, -0.212285f, -0.230516f, -0.035093f, -0.400843f, - -0.305469f, -0.099011f, 0.014225f, -0.452772f, 0.170331f, -0.389312f, - -0.115084f, -0.014770f, -0.429387f, -0.155961f, -0.568200f, -0.037853f, - -0.125137f, 0.067228f, -1.329271f, -0.117874f, -0.132499f, -0.218376f, - -0.588325f, -0.320024f, 0.085695f, -0.235047f, -0.217790f, 0.103015f, - -0.698644f, 0.017766f, -0.058299f, 0.199411f, -0.122485f, -0.563949f, - -0.349011f, -0.557045f, -0.131165f, 0.002281f, 0.118559f, -0.210302f, - -1.153815f, 0.116738f, -0.236007f, -0.003487f, -0.006885f, -0.244816f, - 0.953222f, 0.093748f, 0.266869f, 0.241869f, -0.860832f, -0.387012f, - -0.338986f, 2.097515f, -1.942512f, -0.298021f, 0.543911f, -0.043214f, - 0.082125f, -0.120242f, 0.712231f, 0.213327f, -0.301687f, -0.544011f, - -0.392131f, 0.004302f, 0.004825f, -0.317440f, -0.107518f, -0.293407f, - -0.159111f, -0.080367f, 0.132663f, -0.017726f, -0.237521f, -0.190297f, - -0.361633f, 0.200518f, -0.538296f, -0.027975f, -0.381704f, -0.016963f, - 0.630105f, -0.190997f, -0.287840f, -0.603488f, 3.605598f, -0.276614f, - -1.346383f, 0.186912f, -0.047575f, -0.189232f, -1.519072f, 0.097816f, - -0.223722f, 0.304924f, -0.213022f, -1.052433f, -0.322283f, -1.706734f, - -2.458027f, 0.237976f, 0.171050f, -0.103139f, -0.278689f, 0.329824f, - -0.262448f, -0.122916f, -0.236398f, -0.013848f, -0.969160f, -0.374907f, - 0.091018f, -0.386471f, -0.723940f, 0.064956f, -0.057652f, 1.321024f, - -1.397418f, -0.143136f, 0.272468f, -0.030749f, 0.037324f, 0.069316f, - -0.904925f, -0.333693f, -0.117709f, 2.279598f, -0.428065f, -0.131157f, - -0.014288f, -0.402862f, -0.666090f, 0.017070f, -0.028333f, 0.002481f, - 0.197156f, -0.038120f, -0.271062f, -0.188275f, -0.021370f, -0.070849f, - -0.905007f, -0.095886f, -0.093055f, -0.121821f, -1.239812f, -0.411799f, - -0.089948f, -0.936827f, 1.437569f, -0.388908f, 0.126170f, 0.186162f, - -0.018819f, -0.138364f, -1.066412f, -0.138222f, -0.022186f, 0.107331f, - -0.230436f, -1.352605f, -0.161323f, -1.081810f, -0.933825f, -0.136675f, - 0.378157f, 0.113377f, -0.850610f, 0.080245f, -0.087305f, -0.002852f, - 0.044408f, -0.188172f, -1.891998f, 0.092189f, 0.125325f, -0.105090f, - -0.848510f, -0.396308f, -0.384130f, 2.007509f, -1.480787f, -0.126946f, - 0.314767f, 0.000195f, -0.285628f, -0.110442f, -0.293948f, 0.258559f, - -0.417603f, 1.570705f, 0.092459f, -0.340974f, -0.284754f, -0.007801f, - -0.324610f, -0.004734f, -0.207716f, -0.057175f, 0.055467f, -0.210830f, - -0.113005f, -0.299177f, 0.068074f, 0.017929f, -2.897598f, -0.260074f, - -0.014422f, -0.206467f, 1.246997f, -0.372863f, -0.214160f, -0.114035f, - 5.805862f, 0.003611f, -1.340990f, -0.021085f, -0.260431f, -0.002720f, - -1.251640f, -0.353531f, -0.304009f, -0.153376f, -}; - -static const float av1_ab_partition_nn_bias_32_layer1[LABEL_SIZE] = { - -0.521497f, -1.061572f, -0.078756f, -0.660662f, -0.403741f, -0.960163f, - 0.001427f, 0.523607f, 0.225068f, -0.055273f, 1.019519f, 1.181880f, - -0.010198f, 0.130597f, 1.276752f, 2.028188f, -}; - -static const NN_CONFIG av1_ab_partition_nnconfig_32 = { - FEATURE_SIZE, // num_inputs - LABEL_SIZE, // num_outputs - 1, // num_hidden_layers - { - 64, // num_hidden_nodes - }, - { - av1_ab_partition_nn_weights_32_layer0, - av1_ab_partition_nn_weights_32_layer1, - }, - { - av1_ab_partition_nn_bias_32_layer0, - av1_ab_partition_nn_bias_32_layer1, - }, -}; - -// nn model for ab partition pruning, 16x16. -static const float av1_ab_partition_nn_weights_16_layer0[FEATURE_SIZE * 64] = { - 0.151902f, 0.007947f, -1.788454f, 0.431869f, -2.971387f, 0.923566f, - 1.632542f, -1.665136f, -0.338632f, -5.075884f, 0.398267f, 0.030467f, - 2.263534f, -0.045532f, -1.066128f, 0.915139f, -0.560500f, -3.293125f, - 2.072793f, -1.011414f, 0.122716f, -0.060169f, -0.388860f, 0.031019f, - -0.381861f, 0.001551f, -0.328472f, 0.038296f, -0.060398f, -0.375556f, - 0.209226f, 0.014764f, -1.443469f, -0.345486f, 2.409269f, 1.524846f, - -0.640666f, 1.322139f, -2.074771f, -0.580944f, -0.203960f, -0.072893f, - 0.329701f, 0.115339f, -1.339542f, 0.249024f, -0.421545f, -0.409151f, - -0.258293f, 0.836288f, -0.073685f, -0.009624f, 0.895712f, 0.320639f, - 0.451002f, -1.544558f, 0.193709f, -1.389012f, 1.305451f, 0.089795f, - 0.050338f, -0.017433f, -0.304667f, 0.500729f, 0.504346f, 0.073757f, - 0.582649f, -0.993623f, 1.766766f, -3.067265f, -0.415774f, -0.006036f, - -1.245281f, 0.253205f, -0.591245f, -0.626238f, 0.551852f, 0.593755f, - 0.491023f, 1.099384f, -0.348448f, 0.054564f, -0.451422f, -0.375781f, - -0.248390f, -0.052548f, -0.380069f, -0.165391f, -0.297968f, -0.052142f, - -0.316381f, -0.045246f, -0.243905f, -0.034169f, -0.247523f, -0.180773f, - 0.068066f, -0.374920f, 0.057536f, -0.189748f, 0.058375f, -0.267749f, - -0.147286f, -0.246153f, 0.006183f, -0.202029f, -0.059128f, 0.116852f, - 0.134719f, -0.126900f, -0.064646f, -0.196458f, -0.182331f, 0.108029f, - -0.264499f, 0.155816f, -0.107255f, -0.056983f, -0.209771f, -0.099070f, - 0.007313f, -0.254124f, -0.231964f, -0.275972f, 0.032098f, -0.264564f, - -0.208743f, 0.155599f, -0.121511f, -0.156145f, -0.162315f, -0.059788f, - -0.257073f, -0.076654f, -0.110616f, -0.321675f, -0.051952f, 0.006301f, - -0.154114f, 0.017032f, -0.017364f, -0.233247f, 0.009918f, -0.179289f, - -0.190722f, 0.147106f, -0.063910f, -0.396872f, -0.263123f, -0.003850f, - -0.040718f, -0.324699f, 0.118660f, -0.170727f, -0.316788f, 0.100886f, - -0.202842f, 0.045371f, 0.150561f, -0.057054f, -0.308150f, 0.028346f, - -0.381473f, -0.195365f, 0.026221f, -0.281795f, 0.087204f, 0.047689f, - -0.027643f, -0.104724f, -0.089030f, -0.117661f, -0.349160f, 0.056982f, - -0.340273f, 0.048086f, 0.046103f, -0.121527f, 0.021697f, 0.054109f, - -0.002768f, -0.008461f, -2.297240f, 0.124651f, 3.621661f, -0.057120f, - -1.151656f, 2.296894f, -3.678720f, -0.290240f, 0.087683f, -0.186389f, - 0.007656f, -0.090236f, -0.245217f, 0.110389f, -0.251719f, -0.029084f, - -0.128203f, -0.100005f, -0.032779f, 0.007281f, -0.366596f, -0.267870f, - -0.215620f, 0.047687f, 0.010303f, 0.097980f, -0.191569f, -0.341162f, - 0.119249f, 0.026279f, -2.161546f, 0.459591f, 1.290566f, 1.791797f, - -0.409835f, 0.127081f, -1.156367f, 0.198286f, 0.099561f, -0.067445f, - -0.034352f, 0.017966f, -0.277380f, -0.057220f, -0.174198f, -0.014164f, - 0.146090f, -0.357530f, 0.097644f, -0.000932f, 0.446603f, -0.066793f, - 2.448620f, 0.937617f, -1.232922f, 0.313183f, 0.816827f, -0.275115f, - -0.245205f, -0.126895f, 0.156668f, -0.186977f, -0.273505f, 0.013315f, - 0.168629f, -0.089084f, 0.006166f, -0.116107f, -0.199316f, -0.024010f, - -0.242303f, 0.011612f, -0.218485f, -0.229661f, -0.123922f, 0.136699f, - 0.006732f, -0.148718f, -0.164225f, 0.116063f, 1.587898f, 0.690519f, - 0.360566f, 0.009739f, -0.678702f, -0.046003f, 0.126984f, 0.605212f, - 1.240663f, -0.000228f, -1.119369f, -0.415589f, -0.721003f, 0.097936f, - -1.410586f, -2.358833f, -2.773129f, -3.983361f, -0.087144f, -0.050029f, - -0.242255f, 0.137424f, -0.307490f, -0.084637f, -0.023812f, -0.196582f, - -0.078695f, 0.038257f, -0.012110f, -0.263521f, 0.009839f, -0.109125f, - -0.226036f, 0.060712f, 0.093671f, 0.153143f, 0.039116f, -0.290891f, - 0.227057f, -0.204633f, -0.207539f, -0.148242f, 0.046204f, -0.231268f, - -0.209315f, -0.307579f, -0.436556f, 0.023475f, 0.131793f, -0.038301f, - 1.650584f, 0.392570f, 1.446576f, 1.254380f, -0.516867f, -0.057116f, - 0.149320f, 0.414424f, -0.246309f, 0.003877f, -0.480238f, -1.037035f, - -0.830779f, -1.122244f, -0.408267f, -0.253956f, 0.382005f, 0.940609f, - -1.113370f, -0.018554f, 0.141064f, -0.182504f, 1.270707f, 0.414904f, - -0.216036f, 0.203831f, 0.450716f, -0.452909f, 0.139358f, -0.027143f, - 1.956892f, 1.643732f, -0.867839f, -0.620520f, -0.334607f, -0.519982f, - 0.205023f, 0.661159f, -0.000809f, 0.049033f, -0.348579f, -0.200338f, - -0.362144f, -0.346590f, -0.230096f, 0.180746f, -0.149954f, -0.253429f, - -0.378170f, -0.040724f, -0.041597f, 0.243659f, -0.472181f, 0.015401f, - -0.180376f, 0.153139f, -0.247738f, -0.010485f, -0.157158f, 0.016825f, - -0.238925f, -0.265798f, -0.318374f, 0.142352f, -0.210520f, 0.051928f, - -0.352190f, -0.179052f, -0.185498f, 0.025540f, -0.111667f, -0.235187f, - -0.215454f, 0.010931f, -0.238372f, -0.126659f, 0.075691f, -0.091167f, - -2.462379f, -0.007950f, -0.637990f, 0.285554f, -0.051275f, 0.282279f, - -0.744083f, -0.570646f, 0.592198f, 1.421332f, -0.256027f, -0.140315f, - 0.160247f, -0.063185f, -0.055895f, -0.199864f, -0.287353f, -0.074561f, - -0.071228f, 0.055864f, -1.084764f, -0.263409f, 0.779266f, 0.228187f, - 0.375013f, 0.121204f, -0.656948f, 0.533561f, 0.272671f, -0.015423f, - -0.124180f, -0.009127f, 2.934838f, -0.150998f, 1.163152f, 0.081997f, - -4.715939f, -3.676595f, -1.524886f, -0.167593f, 0.281186f, 0.024046f, - -1.451709f, 0.332558f, 0.990504f, 0.376290f, -1.466773f, -0.448439f, - -2.929108f, -4.255188f, 0.065238f, 0.019950f, 1.372393f, 0.444052f, - -2.538772f, 1.579767f, -0.464911f, -1.866114f, 1.053958f, 0.434467f, - -0.125964f, 0.034671f, 0.077116f, -0.138466f, -0.413395f, -0.223453f, - -0.172127f, -0.251265f, -0.048239f, -0.395519f, 0.023141f, 0.037459f, - -0.249593f, -0.062215f, -0.047209f, -0.435189f, -0.164155f, -0.077590f, - -0.241164f, -0.126128f, -0.038243f, -0.180888f, 0.198840f, -0.328036f, - -0.169790f, 0.036506f, 0.052572f, -0.183570f, -0.073617f, -0.244959f, - 0.266498f, 0.032846f, -1.902106f, 0.486078f, 2.414993f, 0.975182f, - -0.382875f, 1.647810f, -2.197017f, -0.890107f, 0.221287f, 0.010889f, - 3.817042f, 0.572728f, 0.092466f, 0.473337f, -1.634659f, -1.069455f, - 1.486776f, -1.023850f, 0.088184f, 0.008842f, 0.518202f, 0.270259f, - 1.757191f, -0.121839f, -2.912229f, -1.250866f, -2.381808f, 0.335309f, - -0.120079f, -0.061294f, -0.058725f, -0.315169f, -0.262443f, 0.072434f, - -0.267836f, -0.319354f, -0.274975f, 0.068970f, -0.406467f, 0.044074f, - -0.152311f, -0.333656f, -0.228355f, -0.185613f, 0.017346f, -0.177674f, - -0.090675f, -0.102047f, -0.011768f, -0.025280f, -0.271661f, 0.098099f, - -0.312272f, -0.222217f, -0.100548f, 0.106260f, -0.034655f, 0.135109f, - -0.021276f, 0.018177f, -0.353097f, -0.011128f, 0.061136f, -0.511662f, - -0.223236f, -0.308841f, 0.118789f, -0.154628f, -0.053178f, -0.055973f, - 0.013175f, -0.368337f, -0.090863f, -0.116920f, 0.178990f, -0.025278f, - -0.190553f, -0.238092f, 0.303943f, -0.024944f, 0.719373f, 0.384332f, - -0.378480f, -0.423316f, 0.709922f, 0.758514f, -1.559023f, -2.503173f, - 0.068652f, -0.234741f, -0.182932f, 0.037878f, 0.020684f, -0.174142f, - -0.182300f, -0.052796f, -0.219145f, 0.113028f, -1.041826f, 0.035317f, - 0.919904f, -0.676011f, 0.652297f, 1.456447f, -0.166904f, -0.861823f, - 0.895827f, 0.429821f, -0.180376f, -0.076587f, -0.273945f, -0.288990f, - -0.206692f, -0.080745f, -0.085444f, 0.186953f, -0.050135f, 0.044243f, - -0.391706f, -0.160498f, -0.292268f, 0.164060f, 0.412649f, 0.211611f, - -0.327294f, -0.919399f, 0.320297f, 0.385284f, -0.088848f, -0.072556f, - -0.384813f, -0.176267f, -0.065918f, 0.134724f, -0.231104f, -0.337707f, - -0.195442f, -0.263569f, 0.098090f, -0.341411f, -0.189211f, -0.439276f, - -0.404046f, 0.262491f, -0.311093f, -0.086454f, -0.013400f, -0.061447f, - -0.026945f, -0.112036f, -0.322985f, 0.078500f, -0.230205f, -0.344535f, - -0.021087f, 0.110220f, -0.128671f, 0.044219f, -}; - -static const float av1_ab_partition_nn_bias_16_layer0[64] = { - 2.936406f, -0.396539f, -0.110456f, -1.254954f, 0.785350f, 0.516290f, - -0.172341f, 0.254386f, -0.192465f, -0.106751f, -0.055518f, -0.094994f, - 0.000000f, -0.065018f, -0.004908f, -0.130483f, -0.119580f, -0.142072f, - 0.457446f, -0.125051f, -0.107712f, 0.714607f, -0.140809f, -1.788650f, - -0.087199f, 0.000000f, -1.290050f, 0.443930f, -0.110634f, -0.109380f, - -0.188213f, -1.414179f, 1.193579f, 0.388775f, -0.873193f, -0.110050f, - -0.072565f, -0.117050f, -0.119132f, 0.456959f, -0.132069f, 0.131974f, - 1.160474f, 1.746465f, 0.442628f, -0.188849f, -0.207794f, -0.108364f, - -0.856655f, -2.141620f, 0.335476f, -0.105508f, -0.212162f, -0.109319f, - -0.237213f, -0.109980f, -0.291044f, -0.137877f, 0.470191f, -0.023908f, - 0.123809f, -0.109797f, 0.200510f, -0.147542f, -}; - -static const float av1_ab_partition_nn_weights_16_layer1[64 * LABEL_SIZE] = { - -6.823716f, 1.406568f, -0.144009f, 2.228765f, 0.838336f, 0.738107f, - -0.319014f, -0.148756f, 0.240862f, -0.111089f, -0.004241f, 0.025758f, - -0.193820f, -0.246362f, -0.181363f, -0.201556f, 0.024268f, 0.252994f, - -0.289443f, 0.194932f, 0.057467f, 0.724735f, 0.014063f, 1.361352f, - 0.025191f, 0.024274f, 0.231462f, -7.227959f, -0.094515f, 0.039946f, - 0.412719f, 0.812318f, 3.038903f, -0.286289f, 0.647482f, -0.115114f, - 0.053590f, 0.066069f, 0.153134f, 0.996250f, -0.125700f, 0.951365f, - -6.243494f, -4.827697f, 0.566320f, 0.239515f, -0.099702f, 0.054546f, - 1.847330f, 3.680076f, -3.049829f, -0.127709f, 0.068469f, -0.017794f, - 0.223864f, -0.106778f, -0.020425f, -0.040226f, -0.251890f, -0.168673f, - -0.552073f, 0.043311f, 0.218668f, 0.033209f, -3.199210f, 0.193079f, - 0.321406f, 0.718307f, -0.181418f, -0.459612f, -1.981170f, 0.968496f, - -0.029757f, -0.130065f, 0.043782f, 0.072394f, -0.088686f, 0.025322f, - 0.129882f, 0.101324f, 0.335707f, 0.072714f, -2.079774f, 0.203997f, - 0.239321f, -0.301757f, 0.257845f, 1.288382f, -0.031275f, -0.234194f, - 0.310722f, 2.045469f, 0.034716f, 0.135638f, -0.251388f, 0.320071f, - -1.065301f, -0.322731f, -0.545028f, 0.226276f, 0.090799f, 0.019289f, - 0.048950f, -1.079300f, 0.231938f, 0.083683f, 4.762127f, 0.145037f, - -0.145549f, 0.075592f, 0.172336f, 0.108175f, 0.333751f, 1.090501f, - 1.056114f, 0.047073f, 0.182052f, -0.081587f, 0.089900f, 0.339286f, - 2.049988f, 0.073585f, 0.537355f, -0.243322f, -0.010179f, -0.052601f, - -0.174915f, 0.117793f, 2.222990f, -2.520837f, -0.092699f, 1.199887f, - 0.138720f, 0.679918f, -0.463155f, -0.659496f, -0.109913f, -0.003398f, - 0.114633f, -0.128377f, 0.092970f, -0.107489f, -0.191078f, 0.185182f, - 0.216980f, -0.019343f, 3.443133f, 0.287953f, 0.099314f, 0.985958f, - 0.157268f, -0.606516f, 0.049418f, -0.221809f, -0.453081f, -0.344796f, - -0.003735f, -0.107269f, -0.128541f, -0.259543f, -0.934806f, -0.542456f, - -1.011192f, 0.022795f, 0.186363f, -0.076356f, -0.050932f, -0.165098f, - 0.168177f, -0.101596f, -5.270886f, 2.553943f, -0.440870f, -0.017494f, - 0.215208f, -0.017032f, 1.495915f, -4.304677f, 0.762211f, 0.182937f, - 0.254406f, -0.029433f, -0.088364f, -0.110160f, -0.108257f, -0.036538f, - 0.737697f, -0.234989f, 0.168095f, 0.245118f, -0.077262f, 0.195718f, - 0.753302f, -1.637869f, 0.126227f, 0.982129f, -0.121444f, -0.295570f, - -1.215799f, 0.147867f, -0.068496f, 0.132726f, -0.005772f, -0.181774f, - 0.126513f, 0.204723f, -0.366123f, 0.103906f, -0.148053f, -0.075272f, - 0.243884f, -0.104828f, 0.198988f, 0.501034f, -0.112671f, 0.111421f, - 0.167508f, -0.117803f, -0.738624f, 2.046292f, 0.124011f, 0.057983f, - -0.359154f, -0.648883f, -0.259462f, -0.459041f, -2.501223f, -0.065138f, - 0.122417f, 0.060291f, -0.129033f, -0.843086f, 0.268241f, -0.399927f, - 1.585888f, 1.816393f, -0.631427f, 0.127826f, 0.088105f, 0.073488f, - 0.717694f, -1.497362f, 2.608528f, 0.066896f, -0.079230f, 0.223436f, - -0.010530f, 0.175310f, 1.120365f, 0.034391f, 0.835312f, 0.071652f, - -0.080615f, 0.111395f, 0.162742f, 0.079927f, -3.859582f, -0.638431f, - -0.167880f, -0.992659f, -0.885355f, -1.276197f, 1.334344f, 0.931940f, - -0.078244f, -0.149030f, -0.070974f, -0.133566f, 0.200034f, 0.102793f, - -0.048546f, 0.063545f, 0.023864f, -0.190863f, 1.934257f, -0.136286f, - -0.107916f, -0.637468f, 0.066449f, 1.089693f, -0.214047f, -0.265780f, - 0.899660f, -0.130333f, 0.288311f, -0.049024f, 0.090202f, 0.487969f, - 0.339704f, 0.858479f, 0.841253f, -0.184100f, -0.637070f, -0.125071f, - -0.077650f, -0.087877f, 0.202268f, -0.027300f, 2.842862f, -0.100698f, - -0.259080f, 0.260556f, 0.157912f, -0.070364f, 0.467190f, 1.200037f, - 1.419317f, -0.033588f, -0.227824f, 0.292617f, 0.228574f, 0.213839f, - -1.091099f, -0.022258f, -1.294681f, 0.136118f, 0.081652f, -0.185359f, - -0.039706f, 0.191407f, -2.053219f, -0.261934f, 0.047812f, -0.029536f, - -0.823869f, -1.090534f, -0.755890f, 0.441035f, -0.167945f, 0.231441f, - -0.135013f, -0.260762f, 0.256872f, 0.130339f, -0.243751f, 0.189760f, - -0.288454f, 0.145363f, 0.338490f, 0.403898f, -0.022814f, -1.263598f, - -0.101315f, 0.860135f, 0.136511f, 0.028942f, 0.574047f, 2.656370f, - 0.037587f, -0.188690f, -0.125312f, 1.100435f, -1.080402f, 0.380905f, - 0.004635f, 0.097144f, -0.214309f, 0.085552f, -0.285066f, -0.705134f, - -0.054704f, -0.319951f, 5.486626f, 0.958158f, -1.380585f, 0.223340f, - -0.169167f, -0.170697f, -0.216748f, 0.324232f, 2.684204f, -0.008490f, - -0.211052f, -0.201190f, 0.123466f, -0.000234f, 0.579907f, 0.096938f, - -0.042745f, 0.201855f, 0.157195f, -0.261440f, 0.029699f, -0.046599f, - 1.618216f, -2.596280f, -0.377420f, -0.526725f, -0.493592f, -0.579615f, - 0.579699f, -0.100392f, 0.150694f, 0.061794f, 0.200425f, -0.062515f, - -0.179122f, 0.250112f, -0.344675f, -0.118359f, -0.095670f, 0.152311f, - 3.662276f, -0.154921f, -0.312991f, 0.972008f, -0.308596f, -0.190426f, - 0.133889f, -0.238673f, -0.094726f, 1.683835f, -0.215629f, -0.198890f, - -0.035278f, -0.367973f, -0.822435f, 0.240848f, -0.194656f, 0.034655f, - -0.079424f, 0.146670f, 0.026646f, -0.034507f, 0.059467f, -0.153109f, - -0.431033f, 2.552991f, -1.894091f, -0.180462f, -0.306839f, -0.025648f, - 1.026326f, -3.096230f, 1.346935f, 0.033633f, -0.181827f, 0.094376f, - 0.001696f, -0.379264f, -1.069503f, -0.140972f, -0.208769f, -0.195239f, - 0.281795f, -0.127251f, 0.180776f, 0.067763f, 0.697124f, -1.040779f, - 0.111280f, 0.188351f, -0.340234f, -0.207790f, -0.720075f, -0.137409f, - -0.070310f, -0.032918f, -0.060787f, 0.131484f, -0.077845f, -0.258652f, - 0.056911f, -0.062034f, 0.007663f, -0.185100f, 1.340361f, 0.014096f, - -0.124602f, 0.194241f, 0.128383f, 0.360465f, 0.082979f, -0.050475f, - -0.519294f, 3.323262f, 0.067014f, 0.221203f, -0.085082f, -0.228606f, - -0.916668f, -0.022643f, -1.386737f, -0.131902f, -0.349952f, -0.032874f, - -0.189190f, -0.898790f, -0.102394f, -1.017387f, 2.214050f, 1.790253f, - -1.913561f, -0.043716f, -0.214924f, -0.194598f, -0.064723f, -1.671793f, - 2.251166f, -0.146007f, 0.138527f, -0.003134f, 0.103665f, 0.006928f, - -0.240253f, -0.227464f, 0.578437f, -0.214724f, 0.503085f, 0.158093f, - 0.033091f, 0.008061f, 4.815371f, 2.132264f, 0.281850f, -2.288560f, - -0.145012f, 1.296832f, -0.362401f, -0.403252f, 0.109873f, 0.185746f, - 0.244764f, 0.172367f, -0.185588f, 0.139801f, -0.178254f, 0.068629f, - 0.358488f, -0.153969f, -6.433524f, 0.225983f, -0.138123f, -0.095971f, - -0.036089f, -1.400083f, 0.265908f, 0.257787f, 0.181144f, -1.647228f, - -0.136289f, -0.074206f, 0.122988f, -0.088895f, -1.266717f, 0.006010f, - 0.536681f, 0.263061f, -0.032207f, -0.155136f, 0.086431f, 0.441950f, - -0.060755f, -0.280683f, -0.783475f, -2.567033f, 1.093221f, 0.117667f, - -0.000408f, 0.225719f, -2.199698f, 0.141447f, -1.459051f, 0.051315f, - 0.203228f, 0.354432f, -0.005775f, -0.028073f, -0.965817f, 0.231083f, - -0.666884f, 0.026283f, -0.317486f, 0.210754f, 0.123897f, 0.223827f, - 4.214405f, 1.457334f, -0.253945f, -1.306733f, -0.391235f, 0.451154f, - -1.553888f, -0.353429f, 0.069533f, 0.159278f, -0.173836f, -0.004952f, - -0.137033f, 0.127012f, 0.143600f, 0.051587f, -0.070549f, 0.066509f, - -5.776547f, 0.180021f, -0.189183f, -1.288504f, -0.233575f, -1.473873f, - 0.140940f, 0.144451f, -0.104534f, 2.089873f, -0.168168f, 0.110726f, - 0.132134f, -0.215223f, -1.682754f, 0.157757f, -0.146163f, 0.064882f, - 0.117313f, -0.038780f, -0.124720f, -0.501697f, 0.092047f, -0.233992f, - 3.324976f, 0.516601f, 1.294202f, 0.119989f, 0.061055f, 0.043420f, - -2.750727f, -0.382812f, -0.648496f, -0.115353f, -0.334205f, 0.024354f, - -0.282998f, -0.282705f, 0.073798f, 0.169851f, 0.135651f, 0.182677f, - -0.040220f, 0.132462f, -0.303120f, -0.230113f, 6.165739f, -0.258596f, - 0.024127f, -1.388283f, -0.006042f, 0.572600f, 0.348411f, -0.387376f, - -0.075845f, 0.122319f, -0.029616f, 0.077873f, 0.154763f, 0.049073f, - 0.018597f, 0.102688f, -0.204165f, 0.020734f, -1.389133f, -0.032854f, - -0.147561f, 0.853944f, 0.132100f, -3.259659f, 0.243745f, 0.181529f, - -0.738414f, 1.509994f, 0.023470f, -0.005329f, 0.066115f, -1.345081f, - -1.455402f, -0.172023f, -0.194625f, 0.071885f, -0.201742f, -0.262402f, - 0.077601f, -0.048938f, 0.257993f, -0.504029f, -2.032415f, 1.158880f, - 0.448647f, -0.025633f, 0.117586f, -0.072275f, -0.673744f, -3.854342f, - -0.983843f, 0.047766f, -0.017193f, -0.215775f, -0.158743f, -0.232042f, - -0.509112f, 0.148812f, 0.130122f, 0.006486f, -0.099016f, 0.022514f, - -0.486850f, -0.059623f, 4.012731f, 0.025454f, 0.029059f, -0.783546f, - -0.295260f, 0.322521f, -0.473201f, -0.172100f, -0.100087f, -0.076516f, - -0.258367f, -0.112897f, 0.269364f, -0.065912f, 0.169022f, -0.178783f, - -0.095114f, 0.122089f, -2.790099f, -0.100431f, -0.087963f, -0.009431f, - -0.087819f, -2.774399f, -0.100757f, 0.013005f, -0.964533f, 3.236665f, - -0.354903f, -0.144169f, -0.166869f, -1.396513f, -0.931271f, -0.046261f, - -1.799262f, -0.365269f, 0.108611f, 0.037994f, 0.024747f, -1.073639f, - -0.203158f, -0.935006f, 1.880891f, 1.578385f, 0.726272f, -0.024546f, - -0.011626f, -0.151363f, -1.121716f, -1.787484f, 0.232806f, 0.075451f, - 0.182899f, 0.092215f, -0.207347f, -0.030111f, 0.054316f, 0.192481f, - 0.594639f, -0.247694f, 0.547471f, -0.032094f, -0.065000f, 0.007198f, - 1.605377f, -0.155945f, -0.066200f, -2.343716f, -1.016283f, -0.079321f, - 0.919365f, 0.599980f, 0.125545f, 0.265813f, 0.246884f, 0.095385f, - -0.260374f, -0.202916f, -0.042770f, 0.234967f, -0.233139f, -0.326994f, - -1.375256f, 0.121766f, 0.077433f, -1.103569f, 0.019497f, -1.029185f, - 0.253905f, 0.206569f, 0.187334f, -0.237089f, -0.294351f, 0.164137f, - 0.149696f, -0.749787f, -0.413433f, 0.976587f, 1.027976f, -0.285264f, - 0.209273f, -0.124762f, 0.050884f, 0.250764f, -0.082031f, -0.646520f, - 4.116680f, 0.437336f, 0.671684f, 0.129509f, -0.078462f, 0.014072f, - -0.678232f, 0.094831f, 1.125624f, 0.207070f, -0.154750f, -0.025780f, - -0.103030f, 0.118019f, -0.908186f, -0.263546f, -1.555324f, -0.236887f, - -0.217854f, -0.051790f, 0.017915f, 0.171001f, 1.355562f, 0.094603f, - -0.233929f, -1.282169f, -0.773183f, -0.161682f, -0.834565f, -0.286776f, - -0.298901f, 0.038162f, 0.251899f, 0.039612f, -0.022935f, -0.232308f, - -0.043855f, -0.192892f, -0.279009f, -0.182234f, -1.272808f, -0.070344f, - -0.092432f, -1.915946f, -0.134373f, -1.405496f, -0.067071f, -0.131922f, - 0.185269f, 1.465082f, 0.040240f, 0.112665f, 0.144329f, -0.286112f, - -0.617649f, 0.916177f, 0.221044f, -0.079867f, 0.170251f, -0.093638f, - -0.212620f, -0.305945f, -0.234356f, -0.482501f, 3.928472f, 1.241179f, - 0.355922f, -0.170848f, -0.189168f, 0.080225f, -1.357793f, 0.190890f, - 0.976800f, -0.068070f, -0.016295f, -0.088623f, -0.129560f, -0.212267f, - -0.071537f, -0.219501f, -0.655198f, -0.225188f, -0.116024f, 0.224174f, - -0.049715f, -0.178005f, 3.029985f, -1.141546f, 0.080066f, -1.932316f, - -0.641137f, -0.189564f, 0.935080f, 0.136119f, 0.015558f, -0.179331f, - 0.204571f, 0.020350f, 0.009362f, 0.108478f, 0.037076f, -0.049009f, - 0.081090f, -0.180202f, 1.455561f, -0.081559f, 0.059361f, 0.484971f, - 0.160923f, -2.170744f, -0.013204f, 0.126561f, -0.407122f, 1.223661f, - 0.044262f, 0.118044f, 0.058274f, -1.747100f, -0.171318f, 0.971374f, - 0.306995f, -0.103268f, -0.319443f, -0.333176f, -0.038608f, 0.119674f, - -0.106479f, -0.907933f, 1.121231f, 1.673840f, -0.421458f, -0.021146f, - -0.254838f, 0.097632f, 0.235109f, -2.901782f, 0.289518f, -0.355459f, - -0.068264f, -0.179121f, 0.068560f, -0.047570f, -0.522523f, -0.228963f, - -1.037158f, -0.163723f, 0.280563f, -0.000868f, -0.197220f, -0.239329f, - 1.985274f, -0.256181f, -0.064341f, -0.822417f, -0.465140f, -0.010942f, - -0.792024f, -0.114290f, 0.060969f, 0.104106f, -0.252123f, -0.150400f, - -0.133277f, 0.267147f, 0.274413f, 0.223744f, -0.180223f, -0.345415f, - -0.104883f, 0.119210f, -0.095041f, -0.301635f, 0.013175f, -2.128121f, - -0.147208f, -0.151509f, -0.692013f, 3.418555f, -0.016541f, 0.171511f, - 0.107159f, -1.516672f, 0.127408f, 0.687035f, -0.906486f, -0.145463f, - -0.169382f, -0.143906f, 0.125091f, -0.960645f, -0.180869f, -0.716908f, - 2.840951f, 1.904919f, -0.416268f, -0.425181f, -0.194697f, -0.075932f, - -0.950604f, -1.599800f, 0.943671f, -0.022744f, -0.270492f, 0.080843f, - -0.372916f, 0.047838f, -0.100300f, -0.026600f, 0.011733f, -0.226051f, - 0.172790f, -0.172982f, 0.041258f, -0.299379f, -}; - -static const float av1_ab_partition_nn_bias_16_layer1[LABEL_SIZE] = { - -0.053805f, -1.248639f, 0.520965f, -0.904962f, -0.126425f, -0.118798f, - 0.748430f, 0.203096f, 0.059317f, 0.418219f, 0.841294f, 0.402693f, - -0.658522f, 0.723479f, 0.544264f, 1.035225f, -}; - -static const NN_CONFIG av1_ab_partition_nnconfig_16 = { - FEATURE_SIZE, // num_inputs - LABEL_SIZE, // num_outputs - 1, // num_hidden_layers - { - 64, // num_hidden_nodes - }, - { - av1_ab_partition_nn_weights_16_layer0, - av1_ab_partition_nn_weights_16_layer1, - }, - { - av1_ab_partition_nn_bias_16_layer0, - av1_ab_partition_nn_bias_16_layer1, - }, -}; - -#undef FEATURE_SIZE -#undef LABEL_SIZE - -#define FEATURE_SIZE 18 -#define LABEL_SIZE 4 - -static const float av1_4_partition_nn_weights_16_layer0[FEATURE_SIZE * 24] = { - -2.032866f, 0.056691f, 0.495960f, 0.778785f, 0.548153f, -0.806942f, - 0.481155f, 0.282298f, 0.584980f, 0.504688f, 0.209648f, 0.234616f, - 0.213484f, 0.221969f, 0.205862f, 0.235054f, 0.317863f, 0.257139f, - 0.529478f, 0.098122f, -0.657532f, 0.036296f, 0.327728f, 1.323180f, - -0.813082f, 0.160216f, -0.702030f, 0.722733f, -0.270576f, -0.347416f, - -0.264700f, -0.254248f, 0.159820f, 0.087995f, -0.184163f, 0.117357f, - 0.074194f, -0.667369f, 0.498246f, 0.420506f, 0.072409f, -0.121581f, - 0.315788f, 0.000525f, 0.414986f, 0.678166f, -0.011230f, 0.188131f, - -0.227749f, 0.009564f, 0.108672f, 0.106923f, -0.080695f, -0.279382f, - -0.061339f, -0.297835f, -0.134707f, 0.145865f, -0.009655f, -0.000842f, - -0.047436f, -0.159149f, -0.320353f, -0.089646f, -0.344765f, 0.313416f, - -0.143413f, 0.279668f, 0.000885f, -0.022380f, -0.140194f, -0.310473f, - 0.252699f, 0.066204f, 0.477568f, 0.994609f, -0.276000f, 1.213182f, - 0.277028f, -0.411570f, -0.211559f, 0.377815f, 0.121488f, -0.100559f, - -0.317082f, -0.251039f, -0.335181f, -0.154114f, -0.052726f, -0.332558f, - -0.143196f, -0.334035f, 0.162305f, 0.142279f, -0.001210f, -0.135252f, - -0.033562f, 0.204307f, -0.039757f, -0.394174f, 0.126617f, -0.128648f, - -0.410979f, 0.107641f, -0.117573f, -0.326512f, 0.235166f, 0.084959f, - 0.290063f, -0.005838f, 0.459894f, 1.023709f, -0.196145f, 1.100137f, - -0.319815f, -0.308526f, -0.443389f, -0.272769f, -0.035259f, -0.026932f, - -0.029743f, 0.125113f, -0.131024f, -0.321458f, -0.143996f, 0.008714f, - -0.101234f, 0.079706f, -1.128615f, -0.467381f, 0.220563f, -0.409900f, - -0.435353f, 0.759499f, -0.465799f, -0.394309f, 0.176282f, -0.086275f, - -0.161225f, -0.354814f, 0.562871f, 0.418253f, 0.414361f, 0.445480f, - -0.995903f, -0.086632f, -0.230645f, 0.354656f, -0.317576f, 0.079926f, - 0.424369f, 0.997232f, -0.304388f, 1.071667f, -0.023540f, 0.029677f, - 0.108564f, 0.183581f, -0.201395f, -0.054854f, -0.193039f, -0.049899f, - -0.271949f, -0.358483f, 0.304930f, 0.023823f, -0.009319f, -0.214247f, - 0.100712f, -0.050162f, 0.327103f, -0.212999f, -0.030496f, 0.316380f, - -0.439589f, -0.249959f, 0.229777f, -0.353664f, -0.384559f, 0.114236f, - 0.023119f, 0.007927f, 0.618368f, 0.957759f, -0.019780f, -1.002389f, - 0.564277f, -0.839531f, 1.040445f, 0.054340f, 0.031908f, -0.032893f, - -0.019170f, -0.042011f, 0.568928f, 0.362567f, -0.559999f, -0.605344f, - -0.586146f, -0.290778f, 0.195943f, -0.109580f, -0.088898f, -0.113054f, - 0.293282f, 0.429019f, 0.306136f, 0.863025f, 0.021234f, 0.125770f, - -0.097108f, -0.072659f, -0.137053f, -0.191631f, 0.106281f, 0.064151f, - 0.029883f, 0.076287f, 0.757543f, 0.276713f, -2.529775f, -0.351727f, - -1.832316f, 0.544780f, -0.944529f, 0.509705f, -0.010236f, -0.016181f, - 0.021520f, 0.086417f, 0.041312f, 0.296853f, -0.372378f, 0.354446f, - -1.366762f, 0.048875f, 0.464918f, -0.007450f, 0.750013f, -0.360261f, - 0.518532f, 0.753776f, 0.641448f, 0.710746f, 0.250866f, 0.257063f, - 0.283421f, 0.253585f, 0.170303f, 0.210426f, 0.208842f, 0.158000f, - -0.033144f, 0.130748f, 0.907147f, 0.409248f, -0.854301f, -0.981307f, - 0.294427f, -0.507137f, 1.079967f, 0.203203f, 0.383890f, 0.368278f, - 0.305122f, 0.449288f, -0.044507f, -0.547263f, -0.298245f, -0.497834f, - 0.007016f, -0.101982f, -0.073488f, -0.096111f, -0.479418f, -0.045497f, - 0.033502f, -0.018578f, -0.231531f, 0.177949f, 0.099564f, -0.010233f, - -0.333055f, -0.078586f, -0.417867f, 0.171271f, 0.013662f, -0.143599f, - -0.117296f, 0.135382f, 0.048321f, 0.000924f, -0.055024f, -0.405595f, - -0.068260f, -0.271011f, -0.436425f, 0.206751f, -0.899890f, 0.605510f, - 0.535649f, -0.238919f, -0.037619f, -0.213734f, -0.391360f, -0.132344f, - 0.004660f, 0.176644f, -1.008475f, -0.038895f, 0.155429f, -0.095229f, - -0.680124f, -0.258063f, -0.261901f, 0.110380f, -0.337649f, -0.505870f, - -1.428536f, 0.610629f, 0.254905f, 0.045098f, 0.044109f, 0.172329f, - 0.060001f, -0.234009f, -0.184855f, -0.153028f, -0.140897f, -0.152006f, - -0.312134f, 0.081261f, 0.160166f, 0.112690f, 0.266081f, 0.030175f, - -0.242746f, 0.000754f, -0.341811f, -0.149774f, -0.017484f, -0.301342f, - -0.121466f, 0.067300f, 0.342176f, 0.474538f, 0.085441f, -0.263935f, - 0.479235f, -0.003713f, -0.784840f, 0.119480f, 0.456632f, -0.640082f, - -0.080575f, -0.744403f, 0.259970f, 0.034667f, -0.274641f, -0.257594f, - -1.121124f, -0.003745f, -0.420693f, 0.300441f, -0.100976f, -1.049016f, - 0.201960f, 0.113054f, 0.187010f, 1.237427f, 0.054803f, -0.028673f, - 0.003596f, -0.034724f, 0.117246f, 0.190977f, 0.278915f, 0.224307f, - 0.017852f, -0.336233f, -0.372311f, -0.182284f, -0.143510f, 0.331466f, - 0.045698f, -0.301095f, 0.184447f, 0.348240f, -0.017021f, -0.145064f, - -0.000221f, -0.382256f, -0.302683f, -0.083927f, -0.008070f, 0.217907f, - 0.647597f, -0.050490f, -0.572736f, -0.985748f, -0.289943f, 0.041391f, - -0.795464f, -0.186680f, -0.354062f, -0.617400f, -0.282783f, -0.170450f, - -0.197197f, -0.146496f, -0.173692f, -0.106277f, -0.071004f, -0.124405f, - -0.971412f, 0.038542f, 0.705204f, 0.887113f, 0.150430f, -0.243676f, - 0.638410f, 0.320953f, 0.776676f, 0.527584f, 0.070389f, 0.051554f, - 0.177519f, 0.140451f, 0.128892f, 0.087771f, 0.197660f, 0.194764f, -}; - -static const float av1_4_partition_nn_bias_16_layer0[24] = { - 0.614063f, -0.384872f, 0.084884f, -0.023980f, -0.378765f, -0.082312f, - -0.458271f, 0.189578f, -0.046169f, -0.073308f, -0.372322f, 0.162793f, - 0.148803f, 0.829214f, -0.221162f, -0.111157f, -0.017484f, -0.280596f, - -0.031905f, -0.143459f, 0.078823f, -0.021940f, 0.026834f, 0.257472f, -}; - -static const float av1_4_partition_nn_weights_16_layer1[24 * LABEL_SIZE] = { - -0.985391f, 0.587616f, 0.740683f, 0.192066f, 0.447080f, -0.016585f, - 0.680449f, 0.028983f, 0.643111f, 0.234338f, 0.107148f, 0.328456f, - -0.216394f, 1.106838f, -0.179062f, -0.129108f, -0.121655f, -0.151340f, - -0.306017f, -0.350989f, 0.859284f, -0.372831f, -0.954419f, 0.250495f, - 1.046732f, 0.287923f, -0.421088f, 0.326613f, -0.314396f, -0.084757f, - -0.474228f, 0.687999f, 0.052334f, 0.441708f, -0.630698f, -0.350348f, - -0.602067f, -0.434161f, -0.489824f, -0.313193f, 0.315568f, 0.603119f, - 0.120245f, 0.182920f, -1.117797f, -0.239594f, -0.296296f, -0.718093f, - 0.489497f, -0.527019f, 0.102453f, 0.426731f, 0.034606f, 0.311461f, - -0.012723f, -0.229877f, -0.284290f, 0.383227f, 0.065696f, -0.222400f, - 1.279248f, -0.862190f, 0.629766f, -0.250011f, -0.325060f, -0.360115f, - -0.159540f, -0.291856f, -0.038348f, 0.224639f, 0.600934f, 0.030205f, - 1.337615f, -0.286409f, -0.473710f, -0.418995f, -1.035249f, 0.004359f, - -0.481860f, 0.563625f, -0.154709f, -0.101198f, -0.758796f, -0.507616f, - -0.095253f, -0.711135f, 0.207759f, 0.076313f, -0.056087f, -0.162719f, - -0.232918f, -0.128402f, -0.444620f, -0.447344f, 1.126012f, -1.504446f, -}; - -static const float av1_4_partition_nn_bias_16_layer1[LABEL_SIZE] = { - -0.462133f, - 0.465060f, - 0.062211f, - 0.401786f, -}; - -static const NN_CONFIG av1_4_partition_nnconfig_16 = { - FEATURE_SIZE, // num_inputs - LABEL_SIZE, // num_outputs - 1, // num_hidden_layers - { - 24, // num_hidden_nodes - }, - { - av1_4_partition_nn_weights_16_layer0, - av1_4_partition_nn_weights_16_layer1, - }, - { - av1_4_partition_nn_bias_16_layer0, - av1_4_partition_nn_bias_16_layer1, - }, -}; - -static const float av1_4_partition_nn_weights_32_layer0[FEATURE_SIZE * 32] = { - -0.219494f, -0.428273f, 0.471006f, 0.448210f, -0.152935f, 0.440435f, - 0.922857f, -0.074436f, 1.002195f, 0.414176f, -0.327202f, -0.380066f, - -0.212346f, 0.061868f, -0.056620f, 0.594134f, 0.617995f, 0.308358f, - 0.232484f, 0.129849f, 1.483593f, -0.071460f, 1.984515f, 1.116422f, - -1.141762f, -0.306220f, 0.089075f, -0.271845f, 0.187524f, 0.050396f, - -0.061025f, 0.030809f, 0.172799f, -0.458151f, -0.318357f, 0.122052f, - -0.414329f, 0.089366f, 0.118898f, -0.376213f, -0.206151f, -0.519946f, - -0.463252f, -0.206694f, -0.254383f, -0.379487f, 0.093059f, -0.245280f, - -0.205044f, -0.280060f, -0.171229f, -0.045389f, -0.179481f, -0.306245f, - -0.500856f, 0.003388f, -0.527397f, -0.449330f, -0.174272f, 0.123769f, - 0.023005f, 0.157273f, 0.073400f, 0.019099f, -0.113848f, -0.098601f, - -0.290946f, -0.046770f, -0.314592f, -0.179914f, -0.391411f, -0.235631f, - -1.282604f, 0.048505f, -0.746382f, 0.093740f, -0.706583f, -0.085729f, - 0.947382f, -0.002961f, 1.175362f, 1.007309f, 0.141638f, -0.037608f, - -0.118807f, -0.021474f, -0.146763f, 0.069363f, -0.074372f, -0.215713f, - -0.004134f, -0.114110f, -0.330438f, -0.031136f, 0.111821f, -0.534598f, - -0.357759f, -0.455950f, 0.139469f, 0.036582f, -0.384743f, -0.168828f, - -0.239250f, 0.003520f, -0.049003f, 0.075702f, -0.025809f, -0.225972f, - -0.228905f, -0.412489f, 0.060570f, -0.328819f, -0.206446f, -0.080231f, - -0.372008f, -0.218118f, -0.011954f, 0.024155f, 0.156014f, 0.020679f, - 0.194398f, -0.283491f, -0.024463f, -0.275099f, 0.028031f, 0.026340f, - -0.254668f, 0.103637f, 2.178693f, 0.552284f, 0.109366f, -0.474806f, - -0.379286f, -0.026315f, 2.487924f, -0.089466f, 0.206428f, 0.114578f, - 0.152248f, 0.184050f, -0.631948f, -0.014793f, -0.283782f, -0.830353f, - 0.009343f, -0.021029f, -0.060534f, -0.025164f, 1.841311f, 1.842748f, - -1.979708f, 0.450985f, -1.606357f, -0.785454f, -0.212679f, -0.344342f, - 0.198991f, -0.258070f, 0.055974f, 0.224069f, 0.453051f, 0.408053f, - 0.027873f, -0.180538f, 0.056609f, 0.207654f, 0.104086f, -0.194426f, - -0.359789f, -0.381143f, -0.331212f, -0.203973f, -0.324313f, -0.160825f, - -0.160439f, -0.044856f, -0.346647f, 0.044859f, 0.231398f, -0.023643f, - -0.140316f, -0.260177f, 0.206965f, -0.425386f, -0.420268f, -0.409748f, - 0.006971f, 0.066186f, -0.034950f, -0.345518f, 0.018633f, -0.122489f, - -0.038506f, -0.330942f, 0.161236f, -0.314119f, -0.050202f, -0.179597f, - 0.731897f, -0.184481f, 0.153598f, -0.539501f, -0.301493f, -0.184967f, - -0.883754f, -0.586959f, -0.136292f, -1.772065f, -0.196276f, -0.053272f, - -0.101083f, -0.064142f, 0.161190f, 0.430826f, 0.355647f, 0.138266f, - 0.051114f, -0.028893f, -0.477673f, -0.238663f, -0.354117f, -0.056747f, - -0.334273f, -0.497688f, -0.486004f, -0.092033f, -0.241304f, -0.373250f, - 0.120193f, 0.011360f, -0.010475f, -0.092739f, -0.159650f, -0.033129f, - -0.259893f, -0.073217f, 0.200128f, 0.103407f, -0.229233f, 0.128831f, - -0.063450f, -0.241732f, -0.408428f, -0.342239f, -0.264326f, -0.105403f, - -0.442879f, -0.310456f, -0.112881f, 0.263696f, -0.205014f, -0.497936f, - -0.261734f, -0.382312f, -0.426807f, -0.021995f, -0.152794f, -0.301494f, - 0.117232f, -0.577809f, 0.154596f, -0.409522f, -0.413113f, -0.359199f, - 0.307294f, -0.008746f, -0.310522f, 0.347620f, -0.384845f, -0.451398f, - -0.226199f, 0.054154f, -0.167608f, 0.046836f, -0.013285f, -0.408119f, - -0.177973f, -0.248293f, -0.465830f, 0.035827f, -0.222208f, -0.221717f, - 0.066392f, -0.349769f, -0.428029f, -0.516692f, 0.022398f, -0.251682f, - 0.134746f, 0.011167f, -2.078787f, 0.173592f, -1.948348f, 0.330060f, - 1.993785f, -0.052859f, -0.004795f, -3.703177f, 0.013450f, -0.011687f, - 0.073079f, 0.034803f, 0.025515f, 0.005994f, 0.101731f, 0.074303f, - -0.109962f, -0.270825f, -0.068273f, -0.163268f, -0.252826f, 0.137190f, - 0.007667f, -0.358453f, 0.027412f, 0.033492f, 0.021197f, -0.049991f, - 0.104468f, -0.012157f, -0.056252f, -0.380756f, -0.338483f, 0.233235f, - -0.048631f, -0.441209f, -0.158482f, -0.148108f, -0.263453f, 0.138847f, - -0.304073f, -0.336312f, -0.017941f, -0.135563f, 0.075137f, -0.246475f, - -0.229144f, -0.087744f, -0.346909f, 0.172611f, 0.004377f, -0.009386f, - -0.023104f, 0.008000f, -0.029390f, -0.317842f, 0.549674f, -0.195337f, - -0.863979f, 0.160889f, -0.269014f, -0.442104f, -1.799191f, 1.396533f, - -0.112837f, 0.881303f, 0.000764f, -0.035415f, -0.141877f, 0.184831f, - -0.363566f, -0.178569f, 0.254134f, -0.326893f, 0.127325f, 0.310620f, - -0.384621f, 0.146058f, -0.287682f, -0.373447f, 0.026930f, 0.251650f, - 0.053817f, 0.227509f, 0.121396f, 0.396514f, -0.278381f, -0.038969f, - -1.538756f, -0.002856f, -0.892900f, 0.363426f, -1.257922f, 0.743795f, - 0.941177f, 0.219345f, 0.684189f, 1.396858f, 0.026299f, -0.093433f, - -0.066182f, 0.057868f, -0.089278f, -0.159680f, -0.262035f, -0.236656f, - 0.005349f, -0.031314f, 0.027917f, -0.182113f, -0.212086f, -0.160774f, - 0.051468f, 0.036787f, 0.183881f, -0.288205f, -0.349691f, 0.162511f, - 0.117878f, -0.294534f, -0.365037f, -0.246313f, 0.073977f, -0.072378f, - -0.173579f, -0.584560f, 0.547194f, 0.259853f, -0.405287f, -0.421146f, - 0.165788f, -0.146964f, 0.257415f, 0.772394f, -0.475302f, -0.310906f, - 0.058723f, 0.276833f, 0.586842f, 0.248998f, -0.061135f, 0.255779f, - 0.152158f, -0.024781f, 2.821834f, 1.365141f, 0.914744f, 0.165752f, - -1.048304f, -0.333891f, 1.804087f, -0.437028f, -0.120211f, -0.020443f, - 0.040077f, 0.258600f, -0.598893f, -0.494579f, -0.281054f, -0.517041f, - 0.005258f, 0.053986f, 0.322755f, 0.429495f, -1.992364f, -0.717192f, - -1.774802f, 2.047362f, -0.016194f, 0.312606f, 0.019331f, 0.060950f, - 0.116428f, 0.168458f, -0.307001f, -0.420734f, 0.475843f, 0.425346f, - -0.107119f, 0.049892f, -1.168619f, 0.010878f, 0.354872f, 0.902717f, - -0.391407f, 0.332772f, -1.335037f, -0.447100f, 0.481719f, -0.101069f, - -1.806565f, 0.925280f, 0.346999f, 0.093809f, 0.006275f, 0.270814f, - -0.691123f, 0.230748f, 0.137033f, 0.068228f, 1.555975f, -0.271637f, - -0.370403f, 0.236131f, 0.367464f, -0.136562f, 0.428838f, 0.181750f, - 0.338762f, 0.292449f, -0.748204f, -0.922731f, -0.959445f, -0.806418f, - -0.140501f, 0.070525f, 1.248748f, 0.637990f, -1.307246f, -0.514055f, - 0.393858f, -1.858727f, 0.713591f, -0.141044f, 0.080723f, 0.120220f, - -0.031175f, 0.224488f, 0.753818f, -0.833351f, -1.099132f, 0.651100f, - -0.135061f, -0.043820f, 0.026983f, -0.059259f, 0.001345f, -0.281775f, - 0.006958f, 0.046103f, -0.246539f, 0.057630f, -0.360778f, -0.160681f, - -0.414870f, -0.301979f, 0.000683f, 0.132957f, -0.477609f, 0.106110f, - -0.637769f, -0.078374f, -0.229494f, 0.583108f, -0.822973f, -0.107540f, - 1.063426f, -0.268346f, 1.105787f, 2.587550f, -0.020314f, -0.002161f, - -0.063836f, -0.099990f, -0.103975f, -0.114078f, -0.094199f, -0.065181f, - -0.019870f, -0.018920f, -0.219732f, 0.035608f, -1.789450f, 0.483032f, - -0.464729f, 1.563277f, -1.054195f, 0.359991f, 0.065204f, 0.135623f, - 0.158380f, -0.103815f, -1.398726f, -1.436666f, -0.356311f, 0.507752f, -}; - -static const float av1_4_partition_nn_bias_32_layer0[32] = { - 0.421645f, -0.620548f, -0.187819f, -0.189414f, -0.204975f, -0.189600f, - -0.174917f, -0.651928f, -0.799655f, -0.086105f, -0.163449f, -0.089212f, - -0.214495f, -0.108500f, -0.065777f, -0.127704f, 1.544948f, -0.032831f, - -0.165621f, 0.145844f, -0.032104f, -0.453246f, -0.113444f, 0.321589f, - -0.862375f, -0.108826f, -0.486259f, 0.685325f, 0.072569f, -0.187961f, - 0.109579f, -0.082685f, -}; - -static const float av1_4_partition_nn_weights_32_layer1[32 * LABEL_SIZE] = { - 0.255012f, 0.658860f, 0.216907f, 0.165947f, 0.241182f, 0.340854f, - 0.409445f, 0.165220f, 0.553373f, -0.242385f, -0.209571f, 0.255515f, - 0.222500f, 0.037032f, 0.238590f, 0.061624f, -2.038693f, 0.264167f, - -0.230144f, 0.129952f, -0.027979f, 0.847761f, 0.438922f, 0.462323f, - 0.555345f, 0.030689f, 0.336357f, -0.357326f, -0.113137f, 0.272631f, - 0.421022f, 0.367776f, -0.197094f, 0.157117f, -0.015008f, -0.056123f, - -0.283913f, 0.186417f, 0.178561f, -0.763041f, 0.602038f, 0.341092f, - 0.320453f, -0.312776f, -0.371240f, -0.356279f, 0.220117f, -0.131871f, - 1.517429f, 0.162223f, -0.255069f, 0.451861f, 0.045071f, -0.223257f, - 0.003257f, 0.015734f, -0.630447f, -0.672588f, 0.670164f, 0.571031f, - -0.657948f, 0.034506f, -0.249076f, 0.790293f, 0.066491f, -0.131245f, - 0.355173f, 0.564622f, 0.374048f, 0.033974f, 0.253970f, 0.495498f, - -0.556321f, -0.104651f, 0.276947f, 0.057148f, -0.039126f, -0.170050f, - -0.141542f, 0.158541f, 0.582763f, -0.100992f, 0.096705f, -0.209029f, - 0.008449f, 0.255865f, 0.103565f, 0.317719f, 0.479499f, 0.599126f, - -0.065613f, -0.268614f, 0.508736f, 0.180813f, -0.815868f, 0.051238f, - 0.001223f, -0.305423f, -0.270079f, 0.036180f, 0.304342f, 0.202634f, - 0.218348f, -0.304304f, -0.438297f, 0.241123f, 0.200230f, 0.151804f, - 0.051944f, 0.160422f, -0.262981f, -0.417412f, 1.845729f, -0.086183f, - 0.403517f, 0.059667f, 0.564543f, -0.081752f, 0.114907f, -0.284489f, - -0.673943f, 0.056965f, 0.362221f, 0.403224f, -0.000233f, -0.209552f, - -0.800926f, -0.134132f, -}; - -static const float av1_4_partition_nn_bias_32_layer1[LABEL_SIZE] = { - -0.019518f, - 0.198546f, - 0.339015f, - -0.261961f, -}; - -static const NN_CONFIG av1_4_partition_nnconfig_32 = { - FEATURE_SIZE, // num_inputs - LABEL_SIZE, // num_outputs - 1, // num_hidden_layers - { - 32, // num_hidden_nodes - }, - { - av1_4_partition_nn_weights_32_layer0, - av1_4_partition_nn_weights_32_layer1, - }, - { - av1_4_partition_nn_bias_32_layer0, - av1_4_partition_nn_bias_32_layer1, - }, -}; - -static const float av1_4_partition_nn_weights_64_layer0[FEATURE_SIZE * 24] = { - -0.152649f, 0.074509f, 1.000136f, 0.601661f, -1.416694f, -1.932396f, - -1.163850f, 0.640931f, -0.888625f, -0.345711f, 0.161799f, 0.103165f, - 0.147513f, 0.089956f, 0.204329f, 0.196922f, 0.014927f, 0.283714f, - -0.110422f, 0.062005f, -0.531870f, -0.075287f, -0.448349f, -0.218881f, - -0.005592f, -0.130490f, -0.015779f, 0.093521f, -0.158487f, 0.072241f, - 0.066879f, -0.418566f, -0.206281f, 0.025634f, 0.048334f, -0.534750f, - 0.302081f, 0.028707f, -1.543248f, 0.103799f, -1.214052f, 0.395870f, - 0.394754f, -0.272170f, -0.702953f, -4.057464f, -0.033497f, -0.042142f, - 0.014742f, 0.065263f, 0.000879f, -0.019768f, 0.101275f, 0.163059f, - -0.371392f, -0.283484f, 0.241915f, 0.012684f, -0.210101f, -0.166534f, - -0.024894f, 0.274696f, 0.098993f, 0.104086f, 0.055044f, -0.289378f, - 0.146571f, -0.147441f, 0.004056f, 0.112244f, -0.416162f, -0.033176f, - -0.214836f, -0.213787f, 0.023197f, -0.339043f, 0.301109f, -0.408551f, - 0.284922f, -0.344418f, -0.039255f, 0.158748f, -0.344169f, 0.078286f, - -0.043957f, -0.302162f, -0.310826f, 0.063425f, 0.198166f, -0.285324f, - -0.108252f, 0.038992f, -1.053110f, -1.663290f, -0.417185f, 1.504443f, - 0.643206f, -0.850240f, 0.889641f, -0.733214f, 0.147302f, 0.060291f, - -0.052954f, 0.167453f, 0.111870f, 0.085471f, 0.035107f, 0.064361f, - 0.176053f, 0.184373f, 0.676576f, 0.066164f, 1.455569f, 0.925111f, - -0.640845f, 0.803795f, -0.653782f, -0.201038f, 0.060033f, 0.016964f, - -0.047590f, 0.045908f, 0.354162f, 0.014812f, 0.156978f, 0.058792f, - -0.238119f, 0.002450f, -0.094388f, -0.155229f, 0.194858f, -0.355429f, - -0.187098f, -0.119264f, -0.088694f, -0.102845f, 0.184905f, -0.425339f, - -0.157808f, -0.104599f, -0.393248f, -0.379842f, 0.027741f, -0.185816f, - -0.317294f, 0.002453f, -0.498241f, -0.204302f, -0.079093f, 0.020646f, - -0.412850f, -0.426039f, -0.177050f, -0.419304f, -0.064478f, -0.191802f, - -0.146812f, 0.171111f, 0.090261f, -0.367033f, -0.299051f, -0.322132f, - 0.428192f, -0.252613f, 0.488498f, -0.559682f, 0.486720f, -0.511084f, - 0.992506f, 0.346765f, -0.118697f, -0.065127f, -0.376612f, -0.345137f, - -0.426517f, -0.516836f, 0.307083f, 0.609362f, 0.369555f, 0.093775f, - -0.375664f, -0.221595f, -0.025465f, 0.134374f, -0.387031f, 0.096236f, - 0.337465f, -0.124029f, -0.157340f, -0.368790f, -0.104490f, -0.279507f, - -0.247705f, 0.146559f, -0.236206f, -0.036073f, 0.064206f, -0.330919f, - 0.516591f, -0.013492f, 1.269568f, 1.182530f, -0.455390f, -1.328091f, - -0.200950f, -0.380513f, -0.195532f, -0.341479f, 0.016064f, 0.021176f, - 0.169119f, 0.103707f, -0.174504f, -0.462719f, -0.079445f, -0.247128f, - 0.459111f, 0.036129f, 0.769570f, -0.080405f, 1.667107f, 0.355567f, - -2.433896f, 0.627572f, -0.600090f, -0.651872f, -0.059769f, -0.041945f, - -0.009933f, 0.014864f, -0.049378f, -0.041561f, 0.075180f, 0.138307f, - 0.122366f, -0.160756f, 0.215327f, 0.013572f, 0.198194f, -0.762650f, - 0.054466f, 1.110332f, 1.692853f, 0.658654f, -0.409549f, 0.506085f, - 0.330962f, -0.223008f, 0.007448f, -0.289062f, -0.476231f, -0.228359f, - 0.013977f, -0.000609f, -0.673604f, 0.275996f, 0.405291f, 1.693561f, - -1.079768f, 1.122516f, -0.203227f, 0.099265f, -0.165207f, -0.323899f, - -0.269973f, -0.080122f, 0.127700f, 0.190201f, 0.219527f, 0.306194f, - 0.026049f, -0.003779f, 1.107357f, 1.720315f, 1.017908f, 0.078664f, - -1.599813f, -0.482636f, -0.117450f, 0.122249f, 0.030220f, 0.039794f, - 0.176350f, 0.129715f, -0.305755f, -0.274044f, -0.299640f, -0.187335f, - -0.073616f, -0.564507f, -0.127758f, 0.044855f, -0.191090f, 0.039095f, - 0.115378f, 0.969352f, -0.088360f, 0.301443f, 0.065726f, -0.019740f, - -0.102350f, -0.084913f, -0.194615f, 0.118582f, 0.920789f, -0.171615f, - -1.436553f, -0.026419f, -0.730864f, 0.615697f, -0.795079f, 0.119701f, - 0.601782f, 0.792902f, 0.184920f, 1.635090f, -0.085860f, -0.033187f, - -0.166883f, 0.008487f, -0.128300f, -0.089923f, -0.108781f, -0.133719f, - -0.011988f, -0.239816f, -0.092563f, -0.238471f, -0.339722f, 0.177432f, - -0.063101f, -0.121002f, 0.058072f, -0.031166f, 0.086413f, -0.016203f, - -0.305075f, -0.005420f, -0.168796f, 0.148745f, -0.116737f, -0.050222f, - -0.287952f, -0.290982f, -0.090449f, 0.076098f, -0.345632f, -0.061309f, - 0.142218f, 0.035692f, 0.304517f, -0.228031f, 0.119608f, -0.120350f, - 0.163404f, -0.105605f, -0.305462f, -0.176657f, 0.210070f, -0.227600f, - -0.081965f, -0.464027f, -0.053782f, -0.018367f, 0.119159f, 0.017162f, - -0.069792f, 0.305768f, -0.421095f, 0.187740f, -0.032059f, 0.575115f, - -0.064283f, -0.091828f, 0.772648f, -0.393189f, -0.297098f, 0.141420f, - 0.826389f, -0.071586f, -0.893968f, -0.346793f, -1.151655f, 0.039393f, - 1.546000f, -0.094029f, -0.005786f, -0.195764f, -0.169724f, -0.133167f, - -0.129312f, -0.418860f, -0.026553f, -0.053667f, -0.091976f, -0.106275f, - -0.492625f, 0.025350f, -0.332075f, -0.475638f, -0.076667f, -0.065779f, - 0.108957f, 0.246298f, -0.289007f, -0.442552f, -0.206692f, -0.257453f, - 0.073806f, -0.458606f, -0.410390f, -0.312674f, -0.144813f, 0.170128f, - 0.018810f, -0.098241f, 1.027369f, 0.479328f, 1.129707f, 0.484813f, - -0.085207f, 0.621873f, -0.520981f, 0.236175f, 0.273487f, 0.061426f, - 0.306085f, 0.161487f, 0.220991f, 0.223783f, -0.091826f, 0.391031f, -}; - -static const float av1_4_partition_nn_bias_64_layer0[24] = { - 0.580225f, -0.191304f, 1.091767f, -0.134522f, -0.089361f, 0.398750f, - -0.882708f, -0.213102f, -0.119981f, 0.378296f, -0.075719f, 0.426598f, - -2.015505f, 0.202534f, -1.044792f, -0.841519f, 0.266421f, -0.047115f, - -0.131147f, -0.075066f, -0.009441f, 0.853007f, -0.175606f, -0.868306f, -}; - -static const float av1_4_partition_nn_weights_64_layer1[24 * LABEL_SIZE] = { - -0.851937f, -0.211148f, -2.289513f, -0.275071f, 0.251340f, -0.340847f, - 0.498032f, 0.308652f, -0.051574f, 0.323146f, -0.097547f, -0.040269f, - 1.909655f, 0.098348f, 0.588136f, 0.568112f, 0.313297f, 0.920848f, - -0.014486f, 0.386014f, 0.029199f, -0.537330f, -0.021502f, 0.349073f, - -0.524715f, -0.351848f, 1.565454f, -0.297148f, 0.020177f, 0.648369f, - 0.027321f, -0.096052f, -0.363163f, -0.132642f, 0.024292f, -0.734176f, - -0.782700f, 0.408299f, 0.476945f, -0.489512f, -0.728318f, -0.632042f, - 0.405417f, 0.184086f, -0.400730f, 0.359032f, 0.019710f, -0.217409f, - 0.519159f, -0.136316f, 0.993592f, -0.147128f, 0.097495f, 0.426189f, - -0.295233f, 0.278799f, 0.080667f, -0.025052f, -0.307757f, 0.418716f, - -0.853388f, -0.374878f, -0.322725f, 0.696335f, -0.380649f, -0.160356f, - -0.140060f, 0.502455f, 0.656728f, -0.095023f, -0.184198f, -0.347069f, - 0.456372f, -0.029754f, 0.907923f, 0.265710f, -0.065505f, 0.226763f, - -0.277798f, 0.413292f, -0.593899f, -0.060740f, -0.313358f, -0.249944f, - -0.627329f, -0.327151f, -0.853788f, -1.163807f, -0.388944f, -0.228788f, - -0.057382f, 0.334741f, -0.283083f, 0.368280f, -0.407197f, -0.441849f, -}; - -static const float av1_4_partition_nn_bias_64_layer1[LABEL_SIZE] = { - -0.478735f, - 0.292948f, - 0.293172f, - 0.040013f, -}; - -static const NN_CONFIG av1_4_partition_nnconfig_64 = { - FEATURE_SIZE, // num_inputs - LABEL_SIZE, // num_outputs - 1, // num_hidden_layers - { - 24, // num_hidden_nodes - }, - { - av1_4_partition_nn_weights_64_layer0, - av1_4_partition_nn_weights_64_layer1, - }, - { - av1_4_partition_nn_bias_64_layer0, - av1_4_partition_nn_bias_64_layer1, - }, -}; - -#undef FEATURE_SIZE -#undef LABEL_SIZE - -#define FEATURE_SIZE 4 -static const float - av1_partition_breakout_nn_weights_128_layer0[FEATURE_SIZE * 32] = { - -0.331785f, 0.068675f, -0.323814f, 0.033714f, -0.237835f, 0.166316f, - -0.498766f, -0.545634f, -0.266173f, -0.476957f, -0.120409f, -0.021042f, - 0.124056f, -0.278750f, -0.110120f, -0.372812f, 4.547939f, 0.097618f, - -0.002710f, -0.064169f, -1.841173f, -0.403833f, 0.005536f, 0.067188f, - -0.434935f, -0.227421f, -0.000011f, -0.139961f, -0.174056f, -0.652384f, - -0.000015f, -0.262847f, -3.319706f, -0.947693f, 0.002981f, 0.016717f, - -10.408850f, -0.014568f, -0.000018f, 0.019084f, 1.523383f, 0.074525f, - -0.002076f, -0.020734f, 4.881495f, 0.002799f, 0.000342f, -0.019623f, - 1.786154f, 0.037462f, -0.019037f, 0.052833f, 11.408153f, -0.044602f, - 0.026155f, -0.518627f, -0.474499f, -0.427430f, -0.442733f, -0.011116f, - -22.379410f, -0.000549f, -0.001418f, 0.008090f, -0.295090f, -0.230268f, - -0.337278f, -0.001127f, -0.644282f, -0.598783f, -0.539417f, -0.003303f, - 9.189824f, 0.038066f, -0.004097f, -0.460045f, -0.308858f, -0.242691f, - -0.230835f, -0.273057f, 0.152226f, 0.179239f, -0.146382f, -0.004655f, - -0.242940f, -0.718862f, -0.001685f, -0.214736f, 3.263186f, 0.079463f, - -0.003854f, -0.187461f, -0.599144f, -0.419808f, -0.000597f, -0.136980f, - 0.184813f, -0.319525f, -0.007246f, 0.079709f, -0.883229f, -0.343748f, - -0.000077f, -0.172214f, -0.548759f, -0.194674f, -0.144786f, 0.043896f, - -0.176364f, -0.248394f, -0.090215f, -0.294743f, -0.280980f, -0.181436f, - -0.115681f, -0.071915f, -13.035494f, -0.075623f, 0.017052f, -0.171152f, - 5.910803f, 0.128344f, 0.010256f, -1.073301f, 2.387826f, 0.166183f, - -0.007193f, -0.257836f, - }; - -static const float av1_partition_breakout_nn_bias_128_layer0[32] = { - 0.115591f, -0.100178f, -0.165523f, -0.122997f, 11.045759f, 1.034761f, - -0.323672f, -0.189087f, 2.850950f, 7.010029f, -21.447067f, 1.877031f, - 0.437442f, 5.929414f, -0.117274f, 4.462253f, -0.135198f, -0.145927f, - 8.727211f, 0.000000f, -3.532987f, -0.405898f, 11.364439f, -0.141728f, - -5.994947f, -0.362574f, 1.857687f, -0.100400f, -0.130312f, 0.006080f, - 0.429660f, -8.439470f, -}; - -static const float av1_partition_breakout_nn_weights_128_layer1[32] = { - -0.013738f, 0.022052f, -0.074437f, -0.211377f, -0.080433f, 0.015543f, - 0.002091f, 0.014252f, 0.134834f, 0.190263f, 0.244175f, -0.031747f, - 0.020068f, -0.068326f, 0.185471f, 0.660268f, -0.134898f, -0.010376f, - -0.276023f, -0.282921f, -0.022769f, 0.007070f, -0.186235f, 0.024407f, - -0.024837f, 0.005764f, 0.016599f, -0.040077f, 0.020990f, 0.095054f, - -0.039662f, 0.131499f, -}; - -static const float av1_partition_breakout_nn_bias_128_layer1[1] = { - 0.86678213f, -}; - -static const NN_CONFIG av1_partition_breakout_nnconfig_128 = { - FEATURE_SIZE, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 32, // num_hidden_nodes - }, - { - av1_partition_breakout_nn_weights_128_layer0, - av1_partition_breakout_nn_weights_128_layer1, - }, - { - av1_partition_breakout_nn_bias_128_layer0, - av1_partition_breakout_nn_bias_128_layer1, - }, -}; - -static const float - av1_partition_breakout_nn_weights_64_layer0[FEATURE_SIZE * 16] = { - 0.872892f, -0.235539f, -0.412159f, -0.142533f, -2.251479f, -0.057073f, - -0.001373f, 0.112147f, 5.281734f, 0.060704f, 0.000838f, -0.961554f, - 0.244995f, 0.154515f, -0.292654f, -0.167177f, -3.759112f, -0.486347f, - 0.003208f, -0.418226f, 2.618152f, 0.026832f, 0.003988f, -0.404406f, - -0.405434f, 0.102791f, -0.033406f, -0.029820f, -4.492342f, -0.154291f, - 0.012947f, -0.195075f, 0.009311f, -0.411410f, -0.010986f, -0.554822f, - 0.160576f, 0.020796f, -0.457230f, -0.191111f, -7.759542f, -0.065039f, - -0.001322f, 0.055691f, 0.291924f, -0.053076f, -0.148379f, -0.298383f, - 1.022023f, -0.033668f, -0.000804f, -0.825778f, -3.902254f, -0.085812f, - -0.052520f, -0.035012f, -0.465468f, -0.319231f, -0.497529f, -0.183068f, - -2.407131f, -0.062304f, 0.000874f, 0.108786f, - }; - -static const float av1_partition_breakout_nn_bias_64_layer0[16] = { - 0.081425f, -14.404084f, 11.511393f, -0.930053f, 1.841889f, 15.020920f, - -1.872288f, 5.392535f, -0.329335f, -0.005358f, 12.600776f, 0.000000f, - -0.337413f, 4.492778f, 0.000000f, 17.043072f, -}; - -static const float av1_partition_breakout_nn_weights_64_layer1[16] = { - -0.465338f, -0.103023f, -0.174808f, -0.005156f, -0.016366f, -0.172494f, - 0.014185f, 0.067030f, -0.001939f, -0.175049f, 0.245992f, -0.181660f, - -0.038572f, 0.307899f, -0.294283f, 0.118323f, -}; - -static const float av1_partition_breakout_nn_bias_64_layer1[1] = { - -1.33438122f, -}; - -static const NN_CONFIG av1_partition_breakout_nnconfig_64 = { - FEATURE_SIZE, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 16, // num_hidden_nodes - }, - { - av1_partition_breakout_nn_weights_64_layer0, - av1_partition_breakout_nn_weights_64_layer1, - }, - { - av1_partition_breakout_nn_bias_64_layer0, - av1_partition_breakout_nn_bias_64_layer1, - }, -}; - -static const float - av1_partition_breakout_nn_weights_32_layer0[FEATURE_SIZE * 16] = { - -4.825528f, -0.145737f, 0.001907f, 0.145415f, -1.858153f, -0.080744f, - 0.000601f, 0.211991f, 0.384265f, -0.043945f, -0.521332f, -0.170622f, - -0.046866f, -0.600506f, -0.001216f, -0.332760f, -0.447677f, -0.605844f, - -0.121008f, -0.119936f, -0.215739f, -0.269665f, -0.668587f, 0.071318f, - -1.202551f, -0.729727f, -0.370084f, 0.088215f, -1.926800f, -0.086519f, - 0.000359f, 0.215120f, 0.718749f, 0.022942f, 0.003840f, -0.176518f, - 1.213451f, 0.080786f, 0.001557f, -1.053430f, 0.202698f, -0.583919f, - -0.535512f, -0.239927f, -0.110151f, -0.128832f, -0.441087f, -0.145575f, - -0.178518f, -0.585784f, 0.000029f, -0.833014f, -0.331358f, -0.520297f, - -0.088676f, -0.178487f, -1.430755f, 0.022981f, -0.106931f, 0.015573f, - -0.520814f, -0.045386f, -0.443123f, -0.484209f, - }; - -static const float av1_partition_breakout_nn_bias_32_layer0[16] = { - 11.747026f, -9.337718f, 0.341648f, -0.155847f, -0.104005f, 4.666283f, - 6.669584f, 16.625504f, 9.885626f, 15.439183f, -0.346080f, 0.000000f, - -0.423808f, 0.000000f, 6.352258f, -0.155787f, -}; - -static const float av1_partition_breakout_nn_weights_32_layer1[16] = { - 0.168561f, -0.122519f, 0.524667f, 0.032474f, 0.059097f, 0.011900f, - 0.166445f, 0.127256f, -0.034838f, -0.212586f, -0.317973f, 0.348419f, - -0.004171f, 0.157694f, 0.117845f, 0.272115f, -}; - -static const float av1_partition_breakout_nn_bias_32_layer1[1] = { - 0.09049262f, -}; - -static const NN_CONFIG av1_partition_breakout_nnconfig_32 = { - FEATURE_SIZE, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 16, // num_hidden_nodes - }, - { - av1_partition_breakout_nn_weights_32_layer0, - av1_partition_breakout_nn_weights_32_layer1, - }, - { - av1_partition_breakout_nn_bias_32_layer0, - av1_partition_breakout_nn_bias_32_layer1, - }, -}; - -static const float - av1_partition_breakout_nn_weights_16_layer0[FEATURE_SIZE * 16] = { - 0.209371f, 0.028758f, 0.005764f, -0.384401f, -0.625777f, -0.005647f, - -0.316867f, 0.042985f, 0.127344f, 0.025461f, 0.011465f, -0.071043f, - -0.295977f, -0.076093f, -0.209681f, -0.311653f, -0.147538f, 0.009910f, - -0.130997f, -0.012326f, 0.024124f, -0.323578f, -0.005790f, -0.085664f, - -1.575066f, -0.119221f, 0.015018f, 0.187204f, 0.238117f, 0.084924f, - -0.004444f, -1.271538f, -0.709860f, -0.006226f, -0.903111f, 0.090573f, - -0.278642f, -0.011114f, 0.021162f, 0.081290f, -0.467486f, -0.040771f, - -0.224069f, -0.714390f, -0.281905f, -0.001336f, -0.761212f, -0.060385f, - -0.814479f, -0.050450f, -0.003666f, 0.085668f, -0.272589f, 0.057330f, - -0.206540f, -0.303418f, 0.075335f, -0.180468f, -0.064872f, -0.755948f, - -0.509287f, -0.048877f, -0.001512f, 0.077086f, - }; - -static const float av1_partition_breakout_nn_bias_16_layer0[16] = { - 16.421495f, 4.012273f, -1.828571f, 0.000000f, -0.263564f, -0.201972f, - 6.564987f, 14.651000f, -3.227779f, 2.241833f, -0.137116f, 0.762876f, - 5.625762f, 0.615822f, 0.040057f, 16.668884f, -}; - -static const float av1_partition_breakout_nn_weights_16_layer1[16] = { - -0.096440f, 0.184316f, -0.021148f, 0.424974f, 0.003743f, 0.006310f, - 0.046266f, -0.219224f, -0.087004f, 0.024623f, -0.275798f, 0.120164f, - 0.269773f, -0.021105f, -0.146698f, 0.188764f, -}; - -static const float av1_partition_breakout_nn_bias_16_layer1[1] = { - 1.60751927f, -}; - -static const NN_CONFIG av1_partition_breakout_nnconfig_16 = { - FEATURE_SIZE, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 16, // num_hidden_nodes - }, - { - av1_partition_breakout_nn_weights_16_layer0, - av1_partition_breakout_nn_weights_16_layer1, - }, - { - av1_partition_breakout_nn_bias_16_layer0, - av1_partition_breakout_nn_bias_16_layer1, - }, -}; - -static const float - av1_partition_breakout_nn_weights_8_layer0[FEATURE_SIZE * 16] = { - -0.255885f, 0.109548f, -0.111054f, -0.476119f, -1.083031f, -0.342003f, - 0.048241f, -0.356013f, -0.085054f, 0.124908f, 0.000084f, -0.149906f, - -0.729829f, 0.133535f, -0.002125f, 0.207516f, -0.210163f, -0.567365f, - -0.590103f, 0.045308f, -0.539406f, 0.130550f, -0.663879f, -0.170549f, - 0.017587f, -0.054187f, 0.000550f, 0.038297f, -0.112891f, -0.012751f, - -0.048067f, 0.095564f, 0.079892f, 0.077285f, -0.749708f, -0.286312f, - -0.054334f, 0.132242f, -0.004152f, -0.209758f, -0.073407f, 0.082306f, - -0.001034f, -0.090990f, 0.122823f, -0.109794f, -0.230066f, -0.391155f, - -0.262245f, -0.004744f, -0.232246f, 0.099290f, -0.637484f, 0.111937f, - -0.548556f, -0.598344f, 0.123265f, -0.281395f, -0.399711f, -0.525671f, - -0.596269f, 0.098494f, -0.005765f, 0.173652f, - }; - -static const float av1_partition_breakout_nn_bias_8_layer0[16] = { - 0.194141f, -0.111223f, 2.503733f, -7.155602f, -0.695068f, 0.114874f, - 2.056990f, 5.284306f, 0.639643f, -2.792049f, -2.232339f, -0.232209f, - 2.336705f, -0.278834f, 0.231905f, 7.954366f, -}; - -static const float av1_partition_breakout_nn_weights_8_layer1[16] = { - -0.014439f, 0.010171f, 0.048116f, -0.090659f, -0.081235f, -0.021840f, - -0.017360f, 0.031063f, -0.031737f, -0.023439f, -0.037725f, 0.021954f, - 0.055858f, 0.230970f, -0.056466f, 0.119780f, -}; - -static const float av1_partition_breakout_nn_bias_8_layer1[1] = { - 1.27784479f, -}; - -static const NN_CONFIG av1_partition_breakout_nnconfig_8 = { - FEATURE_SIZE, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 16, // num_hidden_nodes - }, - { - av1_partition_breakout_nn_weights_8_layer0, - av1_partition_breakout_nn_weights_8_layer1, - }, - { - av1_partition_breakout_nn_bias_8_layer0, - av1_partition_breakout_nn_bias_8_layer1, - }, -}; -#undef FEATURE_SIZE - -#define FEATURE_SIZE 9 // Input layer size -#define NUM_NODES 32 // Hidden layer size -#define LABEL_SIZE 3 // Output layer size - -static const float av1_rect_partition_nn_weights_8_layer0[FEATURE_SIZE * - NUM_NODES] = { - 0.22151f, 0.99424f, 0.23415f, -1.13841f, -0.11277f, 0.09530f, 0.14769f, - -1.18895f, -0.96640f, -0.21421f, -0.13974f, 0.03236f, 0.15777f, -0.03176f, - 0.02729f, -0.37344f, -0.01727f, -0.05469f, 0.19402f, -3.45508f, 0.90106f, - -2.91557f, 0.19379f, 0.14356f, -0.13291f, 0.05734f, -0.03032f, -0.13060f, - 0.35744f, 1.31630f, -1.54493f, -0.20749f, -0.24413f, -0.04524f, -0.12400f, - 1.08305f, -0.21596f, 0.76244f, 1.10616f, -1.71706f, 0.05768f, 0.10966f, - 0.00949f, -0.12680f, 0.00699f, -0.11522f, -0.38566f, 0.34283f, -0.35266f, - -0.40643f, -0.22462f, 0.32300f, -0.39737f, -0.20587f, -0.16096f, 1.07543f, - 0.30314f, -1.35659f, -0.38212f, 0.45857f, 0.76615f, 0.16819f, -1.24459f, - 0.39677f, 0.87436f, -2.33757f, 1.27471f, 0.27488f, 0.01019f, -0.01221f, - -0.07461f, -0.14577f, -0.01231f, -0.64426f, -1.02733f, -1.96242f, 0.95143f, - -0.06777f, -1.13868f, 0.01354f, -0.75590f, -0.78222f, -0.07453f, 0.61788f, - 0.56899f, 1.17144f, 0.70899f, 0.48568f, 0.11266f, 0.81579f, -0.03929f, - 0.01088f, 0.33599f, -0.22401f, -0.49654f, -0.02598f, 0.04509f, -0.08217f, - -0.30687f, 0.19851f, -2.96860f, -2.30698f, 0.01848f, 0.11801f, 0.06614f, - 0.01673f, -0.11002f, -0.08168f, 0.09204f, -0.06379f, 0.27972f, -0.31716f, - -0.00566f, -0.13651f, -0.37276f, 0.01511f, -0.23697f, 0.21696f, -0.19480f, - 0.60758f, -0.43506f, -0.02247f, -1.45073f, 0.84442f, -0.94018f, 0.32550f, - 0.03985f, -0.06581f, 0.21665f, 0.79472f, -2.41080f, 0.04788f, -0.09492f, - -0.10677f, 0.07250f, 0.14329f, -0.37319f, 0.53043f, -0.49108f, 0.25792f, - -0.36569f, -0.28669f, -0.18416f, -0.52385f, -1.17081f, -1.32153f, -1.13403f, - -0.26196f, 0.93379f, 0.72115f, 0.54464f, 0.27642f, 0.04757f, 2.01629f, - 1.55787f, -0.11665f, 1.00722f, -0.24352f, 0.53308f, 0.57719f, 0.39344f, - 0.19174f, 0.06339f, -0.02530f, 0.07724f, -0.32416f, -0.26992f, -0.35887f, - -0.35285f, -0.33379f, -0.37475f, -0.77335f, 1.70027f, -1.52153f, -0.26503f, - 0.97552f, -2.96705f, -0.91220f, -0.11827f, 0.00406f, -0.14514f, 0.18417f, - -0.20874f, 0.27293f, -0.34072f, -0.34838f, -0.19054f, -0.29806f, -0.27960f, - -0.19293f, -0.18275f, -0.05902f, 0.58625f, -0.05470f, -0.48814f, -0.45382f, - -0.05959f, 2.01250f, -0.30014f, 0.69546f, -1.24180f, 1.34923f, 0.20337f, - 0.16850f, 0.07187f, 0.72630f, -0.15380f, -2.40973f, -2.73561f, -1.71375f, - -1.61695f, 0.50052f, 0.09730f, 0.00579f, 0.06133f, -0.06512f, -0.61439f, - -1.16173f, -0.58716f, 1.60438f, 0.23242f, 0.91847f, 0.49041f, -0.16277f, - -0.02574f, -0.64593f, 1.17028f, 0.46852f, 0.14926f, 0.73853f, -0.78521f, - 0.05959f, -0.35590f, 0.02039f, 0.10812f, -0.28650f, 1.34038f, -0.72188f, - 0.62385f, -0.35271f, -0.39599f, 0.41543f, 0.53124f, -0.23510f, -0.15480f, - -0.05066f, -0.33529f, 0.05238f, -0.35311f, -0.26983f, -0.39764f, 0.01085f, - 0.26593f, -0.18411f, -0.29945f, 0.50090f, -0.03397f, 0.78562f, -0.33068f, - 1.21308f, -2.23273f, -0.33366f, -0.15164f, -1.13270f, 0.17394f, 0.65567f, - 0.76496f, 0.44325f, 0.01368f, -0.33619f, -0.64256f, 0.64478f, 0.84553f, - 1.74183f, 0.22563f, -0.14550f, -0.16258f, 0.03010f, 0.49922f, 0.64575f, - -0.29187f, -0.10348f, -1.43619f, -0.56540f, -0.14779f, 0.04616f, 0.87411f, - -1.08228f, -}; - -static const float av1_rect_partition_nn_bias_8_layer0[NUM_NODES] = { - 0.33919f, -0.03003f, 0.79073f, -0.18508f, 0.00668f, -0.12017f, 0.35362f, - -0.51642f, 0.06536f, 0.41668f, -0.06509f, 0.94606f, -0.15385f, 0.14936f, - 1.46274f, -0.06961f, 2.82537f, -1.95576f, -0.09457f, 0.02042f, -0.07480f, - -0.55083f, 0.26170f, 4.39883f, 0.33999f, -0.10502f, 0.70884f, -0.06992f, - -0.22638f, 1.40940f, -0.09309f, 0.05828f, -}; - -static const float av1_rect_partition_nn_weights_8_layer1[NUM_NODES * - LABEL_SIZE] = { - 0.09209f, 0.26236f, 0.62136f, 0.76324f, -1.14678f, 0.42289f, -0.08895f, - -0.97267f, 2.05958f, 0.00843f, 0.35335f, 1.12096f, -0.11679f, 0.07350f, - -1.23231f, -0.61990f, 1.51379f, -1.99450f, 0.22441f, 2.41974f, -0.30488f, - -0.37869f, 0.47168f, -3.70132f, 0.00061f, 0.19432f, 0.11512f, 0.26200f, - -0.35285f, 0.37985f, 0.90571f, 0.27344f, 0.74840f, -0.17965f, -2.51433f, - 0.59235f, 1.16670f, -0.53446f, 0.67897f, 0.04505f, -0.86874f, 0.45361f, - -0.35033f, 1.21283f, 0.31426f, -0.20841f, 0.56757f, 0.45909f, -1.23683f, - 0.09835f, -0.17214f, -0.96323f, 0.01138f, -0.50233f, 0.30104f, 2.01814f, - 1.15821f, -0.11947f, 0.74574f, -0.30714f, -0.39646f, -1.30086f, -0.88541f, - -0.12259f, -0.54977f, 0.30069f, 1.84299f, -0.95141f, -0.65887f, -0.25888f, - -0.63265f, 1.29531f, -0.56672f, 0.10837f, -0.21297f, -2.19131f, 0.01156f, - 0.51912f, 0.46704f, 0.42810f, -0.59271f, 0.98469f, -0.17914f, -1.91163f, - -0.32807f, 0.48199f, -0.99525f, 1.67108f, -0.87631f, -0.60258f, -0.78731f, - -0.32877f, 0.44237f, 0.01087f, 0.07489f, -0.28224f, -}; - -static const float av1_rect_partition_nn_bias_8_layer1[LABEL_SIZE] = { - 1.70665f, - -0.77954f, - -0.92709f, -}; - -static const NN_CONFIG av1_rect_partition_nnconfig_8 = { - FEATURE_SIZE, // num_inputs - LABEL_SIZE, // num_outputs - 1, // num_hidden_layers - { - NUM_NODES, - }, // num_hidden_nodes - { av1_rect_partition_nn_weights_8_layer0, - av1_rect_partition_nn_weights_8_layer1 }, - { av1_rect_partition_nn_bias_8_layer0, av1_rect_partition_nn_bias_8_layer1 } -}; - -static const float av1_rect_partition_nn_weights_16_layer0[FEATURE_SIZE * - NUM_NODES] = { - -0.18480f, -0.05410f, -0.18957f, 0.15451f, -0.38649f, -0.26162f, -0.22727f, - -0.38555f, -0.36738f, 0.74384f, -1.85999f, 0.98491f, -0.72119f, 1.77321f, - 0.39983f, 0.96314f, 0.23695f, 0.30200f, 0.30629f, -0.47617f, -1.43320f, - -1.81730f, 0.36554f, -0.07142f, -1.27242f, -1.27697f, 0.00110f, -0.32179f, - 0.27460f, 0.45428f, 0.15308f, -0.73906f, -0.28577f, -0.01238f, -0.16958f, - -0.85390f, 1.05484f, -1.62812f, 0.77632f, -0.27327f, -0.32527f, 0.32726f, - 1.73255f, 0.53763f, 0.59121f, -0.39068f, -0.32451f, -0.31869f, 0.17777f, - 0.07519f, -0.18066f, -0.11250f, -0.14616f, -0.16882f, -0.04099f, -0.67959f, - 0.39674f, -0.08596f, 0.18587f, -2.04097f, -1.73993f, 1.57212f, 1.42410f, - -1.36762f, -0.41485f, -1.12103f, 0.56959f, 0.11500f, 0.48945f, -0.13585f, - 1.22125f, 0.67071f, -1.11812f, -0.20660f, -0.52856f, 0.70663f, 0.74382f, - 0.61114f, -0.11454f, 1.14687f, 0.80322f, -0.45965f, -0.44466f, -0.05830f, - 0.13206f, -0.53750f, -0.11324f, -0.37971f, -0.13491f, -0.21268f, 1.93407f, - 1.34433f, 2.49427f, 2.91955f, 1.71730f, 0.03295f, 0.03587f, -0.14550f, - 0.08189f, -0.38655f, -0.35432f, -0.62706f, -0.01849f, -0.57882f, -0.60438f, - -1.01334f, -0.57302f, 0.22592f, 0.05916f, -0.05305f, -0.89824f, -0.52969f, - -0.24542f, 0.27029f, -0.40924f, -0.82452f, -0.60665f, -5.03025f, 0.83302f, - 1.83695f, 2.19716f, 2.31001f, 0.03657f, 0.00063f, -0.04379f, 0.05835f, - -0.08623f, 0.20557f, -0.17791f, 0.07874f, -0.25456f, -0.19513f, -0.27753f, - -0.31982f, 0.00245f, -0.33183f, 0.26059f, -0.22165f, 0.37582f, -0.30411f, - -0.22639f, -0.14739f, -0.20201f, -0.37507f, -1.30653f, 0.49570f, 1.03673f, - 0.66139f, 0.44941f, -0.44461f, -0.50376f, -0.49664f, 0.18608f, -0.26175f, - 0.14844f, 0.78715f, -0.70344f, -0.87624f, -0.98535f, -0.35346f, 0.37094f, - -0.43135f, -0.22571f, 3.46263f, 3.13580f, -1.33203f, -0.15247f, -0.15866f, - -0.11214f, 0.12211f, 0.03964f, -1.87597f, -4.81597f, -4.80195f, -4.98096f, - -5.62336f, -0.05337f, -0.00943f, 0.00792f, 0.02742f, 1.05679f, 2.41455f, - 0.85382f, 1.42504f, 0.58096f, 0.21443f, 1.02694f, 1.06746f, 1.20242f, - 0.60767f, 1.98667f, -0.80879f, -0.63495f, 1.95508f, 0.23952f, -0.15019f, - -0.16097f, 0.30155f, -3.42407f, -1.34998f, 9.07689f, -2.22559f, 2.22562f, - -0.03348f, -0.05229f, 0.05931f, 0.03042f, -0.18068f, -0.05732f, -0.33010f, - -0.32279f, -0.26607f, -0.02723f, -0.04067f, 0.08700f, -0.16366f, -0.24935f, - -0.69124f, 0.58508f, 0.50654f, 0.04492f, 1.38340f, -1.51487f, 1.72889f, - -1.95618f, -3.65013f, -1.38525f, -3.05516f, -2.40448f, 2.47467f, 0.03784f, - 0.08052f, -0.01971f, -0.08918f, -0.84997f, -0.55302f, -1.07861f, -0.62626f, - 0.61751f, -0.11012f, -0.24185f, -0.39201f, -1.85390f, -0.31261f, -0.11927f, - 0.15671f, -0.23450f, -0.14916f, -0.31715f, -0.19350f, 0.01795f, -0.11533f, - -0.05799f, -0.03142f, 0.20218f, -0.39499f, -0.33859f, -0.13201f, -0.19527f, - -0.28459f, -0.20346f, 0.89457f, -2.22103f, -2.37455f, -2.00221f, 2.44553f, - 0.33915f, 0.50047f, -0.34625f, -0.19667f, -0.56333f, -0.84328f, 1.25767f, - -1.70297f, 1.00482f, -0.00103f, -1.40813f, 0.21311f, 0.39230f, -0.07302f, - -3.49100f, 1.60675f, -2.90692f, 0.11022f, 0.13507f, -0.13308f, 0.15201f, - -0.05573f, -}; - -static const float av1_rect_partition_nn_bias_16_layer0[NUM_NODES] = { - -0.16783f, -0.16023f, 0.52215f, -0.04109f, 2.00122f, -0.11633f, 0.25535f, - 1.80638f, 1.69273f, -0.25998f, -6.83550f, -0.79682f, -1.03466f, 1.42721f, - 0.00000f, -0.00000f, -0.11665f, -0.12047f, -1.01497f, 7.27181f, -0.78548f, - -1.39335f, -5.42248f, -0.10388f, 0.07634f, 2.81012f, -0.57429f, -0.15629f, - -0.12044f, 1.65478f, -0.75153f, 1.18441f, -}; - -static const float av1_rect_partition_nn_weights_16_layer1[NUM_NODES * - LABEL_SIZE] = { - -0.26407f, 0.06322f, 0.87932f, 0.17772f, 0.71686f, -0.12283f, 0.08454f, - 0.20098f, -0.31763f, -0.33178f, -4.59535f, -0.04367f, 0.17099f, 3.80486f, - 0.16750f, 0.29218f, 0.57234f, -0.96550f, -0.10599f, -4.91130f, -0.14658f, - 0.95803f, -4.13925f, 0.24567f, 0.25708f, 1.60547f, -1.03251f, -0.31053f, - -0.05659f, -0.94121f, -0.68926f, -0.24738f, -0.38019f, 0.98950f, 0.13689f, - 0.24504f, 0.49623f, 0.19980f, 0.38349f, 0.37481f, 0.54540f, -0.02198f, - 3.43385f, 1.02543f, -0.40921f, -3.07235f, 0.02996f, 0.00323f, -0.35414f, - 0.71099f, 1.39334f, 2.43741f, -1.11007f, -0.22739f, -4.21757f, 0.11905f, - 0.00353f, -1.69637f, 0.45944f, -0.19884f, 0.03624f, 0.25729f, 0.23659f, - -2.08405f, 0.08573f, -0.53393f, -1.28103f, -0.53970f, -0.65465f, 0.31821f, - -0.09884f, -0.69026f, -0.37284f, 0.04622f, 1.32973f, -0.15414f, 0.19138f, - -0.67927f, -0.17658f, 0.36008f, -0.51832f, 0.09887f, -1.94414f, 2.95227f, - 1.76937f, -0.26687f, 8.50976f, 0.26247f, 0.60262f, -0.27910f, 0.30061f, - -0.05117f, 0.16018f, 0.71195f, 0.57871f, 1.57794f, -}; - -static const float av1_rect_partition_nn_bias_16_layer1[3] = { - 2.68750f, - -1.31894f, - -1.36768f, -}; - -static const NN_CONFIG av1_rect_partition_nnconfig_16 = { - FEATURE_SIZE, // num_inputs - LABEL_SIZE, // num_outputs - 1, // num_hidden_layers - { - NUM_NODES, - }, // num_hidden_nodes - { av1_rect_partition_nn_weights_16_layer0, - av1_rect_partition_nn_weights_16_layer1 }, - { av1_rect_partition_nn_bias_16_layer0, av1_rect_partition_nn_bias_16_layer1 } -}; - -static const float av1_rect_partition_nn_weights_32_layer0[FEATURE_SIZE * - NUM_NODES] = { - -0.54654f, -0.43537f, -0.10620f, -0.48051f, -0.43543f, -0.22737f, -0.15429f, - -0.09858f, -0.09438f, 0.37306f, 0.23934f, -1.86375f, -1.18307f, -0.32995f, - -0.09745f, 0.05431f, -0.13799f, 0.14734f, -0.33219f, 0.18057f, -0.23792f, - -0.28126f, 0.02977f, -0.07431f, 0.07860f, 0.00067f, -0.01927f, 1.01841f, - -0.57739f, 0.08412f, -1.33843f, -1.05563f, -0.28693f, -0.39425f, -0.69572f, - -0.16703f, 0.02808f, 0.11994f, -0.26267f, 0.19706f, -0.29707f, -0.25305f, - -0.07050f, -0.02704f, -0.31528f, -0.42301f, 0.22496f, -0.37001f, -0.23319f, - -0.11139f, -0.30513f, 0.04213f, -0.12550f, 0.02504f, 0.33245f, 0.01102f, - -0.35950f, -0.05949f, -0.19590f, -0.27457f, -0.28339f, -0.15676f, -0.21538f, - 0.65066f, 0.28443f, -1.24943f, -3.00246f, -1.01897f, 0.09304f, 0.70052f, - -0.12877f, 0.21120f, -0.37476f, 0.23261f, -0.28401f, 0.09837f, 0.00020f, - -0.12106f, -0.32354f, -0.02472f, -0.19772f, 1.01886f, 0.16596f, -0.06532f, - 1.72938f, 1.57754f, 0.55963f, 0.33246f, -0.20023f, 0.30715f, 0.08629f, - 0.18945f, -0.45988f, -1.22610f, -0.05152f, -0.48859f, -1.02104f, -0.27315f, - -0.57698f, 0.04157f, -0.92428f, -1.31268f, 1.78210f, 0.10291f, 1.55042f, - -1.26793f, 1.39042f, -1.43729f, 0.25600f, 5.21263f, 5.31955f, 5.19316f, - 5.43430f, 0.00294f, -0.00970f, -0.02333f, 0.00250f, 1.17672f, 6.27544f, - 4.95973f, 3.54009f, 4.51269f, 0.30750f, 0.78780f, -0.44741f, -0.76442f, - 0.75050f, 0.58799f, 0.03400f, -2.09859f, 1.67313f, 0.12503f, 0.28609f, - 1.15809f, 2.46530f, -0.04898f, 0.23072f, -0.12635f, -0.82097f, -0.63827f, - 2.16779f, 1.77132f, 0.15434f, -1.06427f, 0.06206f, -0.87732f, -0.61897f, - -0.44593f, -0.77131f, -0.15979f, -0.02282f, -0.74381f, 0.66052f, -0.22992f, - 1.74638f, 1.29199f, -0.55464f, 0.98316f, 0.06665f, 0.50254f, -0.66292f, - 0.17113f, -0.32633f, -1.85803f, -0.92759f, 4.44965f, 1.33057f, 0.02135f, - -0.27446f, -0.26018f, -0.12613f, -0.14470f, -0.23355f, -0.09717f, -0.24123f, - -0.05535f, -0.19146f, -0.36222f, -0.30458f, -0.40323f, 0.21779f, 0.14248f, - -0.48630f, 0.18840f, 0.11040f, 0.17287f, -0.51880f, 1.12466f, -0.38888f, - -0.16421f, -0.31784f, -0.36112f, -0.25386f, -0.01636f, 0.10029f, -0.26881f, - -0.17051f, -0.30903f, -0.08573f, -0.28774f, -0.01173f, -0.09706f, -0.23089f, - -0.12922f, -0.17463f, -0.12433f, -0.23074f, 0.15220f, 1.29826f, 0.23788f, - 0.04189f, 2.66416f, 0.48815f, -0.06803f, 0.96742f, 1.27165f, -0.70348f, - -0.09941f, -0.42948f, -0.20243f, -0.02364f, -0.26689f, -0.40629f, -0.68217f, - -0.48073f, 2.43657f, -2.60191f, -1.82837f, 0.50440f, 0.71829f, 0.76491f, - 0.28293f, 0.20568f, 0.92642f, -0.02496f, 1.43637f, -0.24474f, -1.21030f, - 0.54084f, 1.05130f, 1.29572f, 0.03750f, -0.36894f, 0.74548f, -1.33857f, - -0.84858f, 1.35230f, 0.80175f, 0.66136f, 1.06473f, 0.18701f, 1.42413f, - 0.04661f, -0.07820f, 0.64990f, -0.43595f, 1.18304f, -0.11437f, -0.06365f, - 0.03558f, 0.78260f, -1.74890f, 1.56217f, -1.23424f, 4.59193f, -3.35072f, - 0.01180f, -0.18296f, -0.20870f, 0.04510f, 1.52595f, -1.37402f, -0.33123f, - -0.85957f, 0.80598f, 0.03743f, 0.02354f, 0.37707f, 1.62095f, -0.29627f, - -0.31778f, -0.45789f, -0.14906f, 0.25315f, -0.10817f, -0.32610f, -0.40890f, - 0.33984f, -}; - -static const float av1_rect_partition_nn_bias_32_layer0[NUM_NODES] = { - -0.17482f, 0.39042f, 0.00000f, 1.69677f, 0.08792f, -0.09301f, 0.13809f, - 4.84061f, 0.00000f, 0.40515f, 0.46246f, 0.20644f, -5.77478f, -1.54510f, - 0.05660f, -0.32013f, 0.23649f, 0.03778f, -2.53710f, -0.27869f, 0.45623f, - -0.04155f, -0.18445f, -0.73405f, -0.50243f, 2.23191f, 1.93272f, -1.07032f, - -0.27602f, -1.98063f, 0.20816f, -0.01315f, -}; - -static const float av1_rect_partition_nn_weights_32_layer1[NUM_NODES * - LABEL_SIZE] = { - 0.02827f, 1.02560f, -0.07137f, -0.31911f, 0.11365f, 0.13684f, -0.07816f, - -5.23036f, -0.34340f, 0.84526f, -1.51845f, 0.07017f, -8.12570f, 6.24061f, - 0.35739f, -0.09937f, -0.30978f, 0.22032f, 0.74968f, -0.34557f, 0.45547f, - -0.16512f, 0.07118f, 1.66415f, 0.41320f, -1.81533f, -1.96004f, 1.04666f, - 0.84049f, 4.31009f, 0.68850f, 0.26322f, -0.24634f, -1.25889f, 0.31952f, - 0.63632f, 0.05801f, -0.10664f, -0.21992f, 2.44386f, 0.19526f, -0.09838f, - 1.53049f, -0.26630f, 3.54126f, -3.40574f, 0.72730f, 0.04557f, 0.92652f, - 0.15522f, 2.35895f, -0.13347f, 0.56907f, 0.15352f, 0.01823f, -0.73939f, - 0.43104f, 1.90321f, 0.31267f, -0.51972f, 0.50094f, -3.98372f, -3.41518f, - -0.48183f, 0.26661f, 0.64146f, 0.14500f, -0.01695f, 0.16653f, -0.37846f, - 0.08412f, 2.69714f, -0.20258f, -0.75786f, 0.11201f, 0.61878f, 4.22231f, - -3.55330f, -1.14137f, -0.37722f, -0.28000f, -0.72581f, -2.62827f, -0.19448f, - -0.59398f, -0.30136f, -0.17725f, -0.69630f, -0.41132f, 0.12208f, 2.11441f, - -1.08794f, -1.41694f, 0.02620f, 2.18792f, 0.04271f, -}; - -static const float av1_rect_partition_nn_bias_32_layer1[3] = { - 2.47332f, - -1.65756f, - -0.81573f, -}; - -static const NN_CONFIG av1_rect_partition_nnconfig_32 = { - FEATURE_SIZE, // num_inputs - LABEL_SIZE, // num_outputs - 1, // num_hidden_layers - { - NUM_NODES, - }, // num_hidden_nodes - { av1_rect_partition_nn_weights_32_layer0, - av1_rect_partition_nn_weights_32_layer1 }, - { av1_rect_partition_nn_bias_32_layer0, av1_rect_partition_nn_bias_32_layer1 } -}; - -static const float av1_rect_partition_nn_weights_64_layer0[FEATURE_SIZE * - NUM_NODES] = { - 0.08972f, 4.09095f, -0.31398f, -2.43631f, -0.74767f, 1.42471f, 1.60926f, - 1.44721f, 1.88259f, 2.35375f, 1.88299f, 2.01109f, 0.98679f, 2.24131f, - 0.06279f, -0.08315f, 0.32107f, 0.91334f, -0.36569f, 5.55049f, 5.44943f, - 5.20471f, 5.39099f, -0.01943f, -0.00284f, 0.02203f, -0.01309f, 1.41917f, - 6.68460f, -6.15986f, 6.41341f, -3.20630f, -0.00567f, -0.00038f, 0.05960f, - 0.04308f, 0.95366f, 3.48535f, 2.98266f, 4.11784f, 3.44255f, 0.61630f, - 0.71405f, 0.63945f, -0.00713f, 0.39193f, 1.91621f, 3.32755f, 0.71674f, - -0.11647f, 2.07090f, 2.64191f, 0.07949f, -0.05023f, 0.99935f, 0.83145f, - 0.75898f, -0.98764f, -0.58731f, 1.21734f, -0.08076f, -3.26780f, 1.66278f, - 0.04189f, -0.33177f, -1.58648f, 1.00883f, -0.56132f, -2.34877f, 0.67056f, - -2.32297f, -0.91641f, -1.02909f, 4.19781f, 3.87484f, 4.32778f, -1.97171f, - -0.24734f, 0.00822f, 0.05892f, 0.12697f, -3.62915f, -2.93127f, 7.94856f, - -3.29311f, 3.26001f, -0.02231f, 0.02741f, 0.05919f, 0.08190f, -1.49344f, - -0.64475f, -0.24627f, 4.03324f, -1.14799f, -0.18465f, -0.17829f, 0.10394f, - 0.08580f, -5.74721f, 4.42467f, 3.63964f, 3.00258f, -1.22744f, -0.29408f, - 0.00767f, 0.12305f, 0.05249f, -0.17166f, -0.20120f, -0.32941f, -0.31901f, - 0.04628f, -0.35249f, -0.18272f, 0.03956f, -0.19329f, -0.33564f, 0.09856f, - -0.00173f, -0.31751f, -0.05702f, -0.20558f, -0.31464f, -0.02488f, -0.00729f, - -0.35854f, -0.14762f, -0.34897f, -0.12746f, 0.04011f, -0.24918f, -0.53516f, - -0.28440f, -0.36789f, -1.34889f, -9.10044f, -9.19238f, 4.48042f, 6.54429f, - -0.00226f, 0.00430f, 0.00321f, 0.00442f, 0.87551f, -0.16224f, -0.22832f, - -0.60640f, -0.28738f, 0.18062f, 0.22008f, -0.47406f, 0.80302f, 0.12149f, - 1.49530f, 1.05069f, -2.02985f, -0.92833f, 0.25616f, 0.12852f, 3.51840f, - 0.25226f, -2.63283f, -4.04386f, 8.46300f, -2.93408f, 0.44069f, 0.08276f, - 0.34482f, -0.22615f, 0.28666f, 3.02962f, -1.20055f, -1.04832f, -0.97632f, - -0.99530f, 1.44196f, 1.68550f, 0.49360f, 1.08155f, -0.26059f, -0.02876f, - -0.27492f, -0.06205f, -0.09496f, -0.12314f, -0.30228f, -0.07453f, -0.38857f, - 1.17443f, 2.41497f, 1.90537f, 2.37716f, 2.91495f, -0.44455f, -0.51176f, - 0.48195f, 0.53032f, 0.23696f, -1.06211f, 1.47459f, -0.89029f, 0.29521f, - 0.66291f, -0.42653f, 1.82308f, -1.30372f, -0.36192f, -3.40388f, -1.61476f, - -2.29745f, -0.66886f, -2.08252f, -0.54552f, -4.06849f, 0.02948f, 0.27297f, - -4.81472f, 4.60404f, -0.11053f, 0.14765f, 0.02826f, -0.14688f, -0.07066f, - -0.01224f, 1.20377f, 7.02725f, -6.02627f, 6.87255f, -3.14257f, 0.01074f, - 0.02397f, -0.02359f, 0.01901f, 0.14956f, -1.67671f, 2.26714f, 2.57043f, - -0.45888f, -1.60265f, -2.11475f, -2.74029f, -2.74658f, -0.35630f, -2.63013f, - -2.14814f, -0.67266f, -1.56850f, 0.57137f, -1.14428f, -0.34265f, -0.12521f, - 0.01220f, -0.74906f, -0.19270f, 0.68110f, -0.24737f, -0.70568f, -1.64826f, - -0.35847f, -0.15984f, -1.17932f, -8.72306f, -8.72834f, 3.93701f, 6.17812f, - -0.03191f, -0.00104f, 0.01402f, -0.00046f, -0.94517f, 1.51266f, -0.56318f, - 0.72260f, -0.09253f, -0.09069f, -2.16695f, -0.23653f, 0.24418f, 2.21148f, - -1.47954f, -1.01439f, 0.31536f, 0.77238f, -0.85083f, -0.15758f, -0.50886f, - 0.09101f, -}; - -static const float av1_rect_partition_nn_bias_64_layer0[NUM_NODES] = { - 0.91706f, -1.31328f, -5.16196f, 1.13191f, -0.98044f, -1.61122f, 1.03039f, - -0.98537f, -4.45568f, -4.34802f, -0.92116f, 0.66836f, -0.10752f, -0.13065f, - -0.35567f, -0.35693f, 1.74941f, 1.17379f, -3.45555f, 5.66321f, -0.24917f, - -1.11940f, -0.73656f, -0.19299f, -0.04181f, 1.11010f, -2.97859f, -0.16774f, - 0.59835f, -0.31269f, -0.30585f, -1.66212f, -}; - -static const float av1_rect_partition_nn_weights_64_layer1[NUM_NODES * - LABEL_SIZE] = { - 0.58963f, 4.20320f, -8.62465f, -6.54014f, 5.41108f, 2.33581f, -0.10354f, - -1.17753f, -3.45909f, -2.24722f, 2.20881f, 3.21971f, -0.09087f, -0.21624f, - 0.16529f, -8.40985f, -1.60205f, -1.41538f, 4.41826f, -4.63069f, -0.27742f, - 4.08710f, 0.26439f, -1.46028f, 0.51234f, 6.25212f, -3.35650f, -1.21348f, - 1.37201f, 8.89151f, 0.28859f, -0.97328f, -0.36196f, -2.71701f, 4.54196f, - -0.62476f, -2.43814f, -1.34209f, 0.12850f, 1.73859f, 3.09809f, -4.42434f, - -1.82552f, -3.66420f, -0.31535f, 0.00968f, -0.02019f, 9.66824f, 0.58835f, - 1.50425f, 2.84487f, 2.55522f, 0.01409f, -2.27594f, -0.31800f, 0.91076f, - -0.66808f, 0.33120f, -0.12460f, 0.64457f, -0.36416f, -10.30843f, 1.51013f, - 2.06861f, -0.20989f, -0.87119f, 3.68642f, 7.33662f, -2.88037f, -0.52414f, - -0.35036f, -0.45947f, -0.07406f, 6.46346f, -0.16031f, 0.27071f, 0.38845f, - -0.21940f, 0.08583f, -1.39526f, 0.50554f, 0.45279f, -6.61856f, 1.84069f, - -0.19149f, -1.77235f, 0.75136f, 1.11797f, 0.32677f, -7.10427f, 3.82908f, - 1.04238f, -0.91435f, 1.93317f, -1.84946f, -0.48909f, -}; - -static const float av1_rect_partition_nn_bias_64_layer1[3] = { - 0.32215f, - -0.57522f, - 0.25314f, -}; - -static const NN_CONFIG av1_rect_partition_nnconfig_64 = { - FEATURE_SIZE, // num_inputs - LABEL_SIZE, // num_outputs - 1, // num_hidden_layers - { - NUM_NODES, - }, // num_hidden_nodes - { av1_rect_partition_nn_weights_64_layer0, - av1_rect_partition_nn_weights_64_layer1 }, - { av1_rect_partition_nn_bias_64_layer0, av1_rect_partition_nn_bias_64_layer1 } -}; - -static const float av1_rect_partition_nn_weights_128_layer0[FEATURE_SIZE * - NUM_NODES] = { - -0.70901f, -3.03481f, 3.30604f, -1.28803f, -0.08610f, -0.33320f, -0.30716f, - 0.25100f, 0.14323f, -0.98422f, -0.89084f, -0.24508f, -1.10785f, -0.82524f, - 0.11766f, -0.42777f, 1.08965f, 4.35125f, -1.19388f, 4.22042f, 4.96306f, - 6.32406f, 3.29899f, -0.90768f, 0.05203f, 0.38467f, 1.74257f, -0.19918f, - -0.11335f, 0.00140f, -0.42303f, -0.04419f, 0.03583f, -0.05441f, -0.19586f, - 0.01484f, -1.19964f, 0.25497f, 3.04502f, 0.05446f, -0.23253f, 0.00266f, - 0.07117f, -2.78986f, -4.62953f, 1.45331f, 0.43923f, 0.92298f, -0.47736f, - 1.49165f, 0.45942f, -1.99787f, 3.33510f, 0.17234f, 0.04024f, -1.42780f, - 0.23566f, -0.90970f, 1.18041f, -1.45865f, 2.30878f, -1.28507f, 1.87290f, - 1.91186f, 4.74826f, -3.70735f, 4.49808f, -4.72275f, -0.02696f, -0.02642f, - -0.06093f, -0.01121f, -0.70683f, 2.69737f, -1.88563f, 2.48637f, 1.10922f, - 0.74624f, 0.40308f, 2.06396f, 1.39289f, 0.00909f, -2.05271f, -1.53539f, - -1.38323f, 0.83303f, -0.32250f, 0.51172f, 3.91249f, 1.66373f, 1.13184f, - -2.22874f, -1.13448f, -0.11185f, 0.19387f, 0.36770f, -0.58933f, 0.22789f, - 1.17307f, 0.77461f, 0.20817f, 0.33417f, 0.54037f, 0.32961f, -0.18456f, - -9.78171f, -0.17216f, -3.44703f, -2.42158f, 0.51946f, 4.35949f, -0.73335f, - -1.61515f, -0.29622f, -0.37617f, -0.42316f, 0.74922f, 1.44386f, 3.92704f, - -3.76274f, 4.19775f, -3.86958f, 0.00074f, -0.02418f, -0.12944f, 0.05857f, - -0.85507f, 5.42546f, 5.40338f, 5.54347f, 5.59791f, -0.01611f, 0.01618f, - -0.01654f, -0.00270f, -0.39608f, -0.40410f, -0.24551f, 0.09124f, -0.34413f, - -0.11504f, 0.12793f, -0.31523f, 0.09148f, -0.08567f, -0.05140f, -0.13310f, - -0.81200f, 0.06882f, -0.52537f, -12.74048f, -0.45395f, -4.04775f, -1.84887f, - -1.02573f, 0.32788f, 1.06828f, -1.25503f, -0.42693f, 2.01413f, -2.29103f, - 0.62271f, 1.11764f, -1.83113f, -1.32325f, -1.65651f, -2.87826f, 1.46910f, - 0.60885f, 0.16079f, 0.00171f, -0.25658f, -0.25465f, -0.14149f, 0.19497f, - -0.07866f, -0.37080f, -0.05778f, -0.08870f, -0.20491f, 0.84521f, -0.18214f, - -1.38441f, -1.08932f, -1.76627f, 0.73172f, 0.05967f, 1.28057f, 3.42722f, - 1.69287f, 0.77169f, 0.44528f, 1.85513f, 0.07840f, 1.31252f, 2.89948f, - 1.49489f, 0.15281f, 0.54708f, -1.14185f, -2.51063f, 0.36618f, -0.55322f, - 0.96671f, 1.59470f, 1.38252f, 1.99697f, 0.03266f, -0.23200f, -0.01127f, - -0.18918f, -0.37598f, -0.03119f, -0.36039f, -0.21192f, -0.11565f, -4.22635f, - 1.41252f, 0.56608f, -0.08867f, 3.11924f, -0.54597f, -0.12504f, -0.05289f, - -0.28665f, -0.58297f, -1.18362f, -0.76201f, -1.22011f, -0.58756f, 0.14740f, - 1.43971f, 0.98381f, -0.02998f, -0.40678f, -0.23047f, -0.12979f, 0.04003f, - -0.22081f, -0.09294f, -0.15955f, -0.10379f, -0.10192f, -1.51316f, 2.39482f, - -1.69975f, 3.58976f, -0.91032f, -0.03498f, 0.48982f, -0.13418f, 0.76256f, - 1.61003f, -2.01676f, -1.24430f, -3.25763f, 1.12314f, 2.00740f, 0.04613f, - -0.14746f, -0.57374f, 3.44511f, -0.56767f, -4.08432f, -2.04894f, 2.35951f, - -0.00458f, 0.18512f, 0.09916f, -0.04084f, -1.56207f, 1.38034f, 4.17302f, - -1.47326f, -2.03530f, -0.00210f, 0.27469f, -0.17423f, 0.86860f, 2.76195f, - 2.43269f, -3.57331f, 2.08715f, -1.44171f, -0.17389f, 2.26157f, -0.07852f, - 2.02519f, -}; - -static const float av1_rect_partition_nn_bias_128_layer0[NUM_NODES] = { - 2.53427f, 1.66678f, -0.84914f, -0.15070f, -1.74769f, 0.45218f, -0.26067f, - 2.05916f, 0.08978f, 5.30984f, 2.66243f, -1.62740f, 0.70018f, 1.96403f, - -4.97152f, -0.05425f, -3.84474f, -1.28006f, 3.47490f, -0.08373f, 0.00225f, - -1.40692f, -0.27569f, -0.30253f, 0.77377f, -0.67636f, -0.26379f, 1.82348f, - 0.66120f, 0.61119f, -1.42293f, 0.32676f, -}; - -static const float av1_rect_partition_nn_weights_128_layer1[NUM_NODES * - LABEL_SIZE] = { - 1.53453f, -0.23707f, 7.88368f, 0.33340f, 0.97523f, 1.38538f, -0.16746f, - 4.42070f, 3.18678f, -5.03545f, -2.27029f, -3.75719f, -0.26850f, -4.93432f, - -8.75673f, 0.27398f, -5.77882f, -0.91616f, -2.62725f, -0.23961f, 0.31249f, - 3.32134f, 0.25375f, -0.00394f, 2.30213f, -0.14183f, 0.14544f, -1.42830f, - 1.31101f, 3.99389f, -0.00017f, -2.90184f, -2.11444f, 2.16734f, -3.05133f, - 0.39206f, 4.61489f, -2.88181f, -0.47745f, 2.86649f, -1.20621f, 3.70550f, - 1.58029f, -4.58731f, -2.29350f, -0.76930f, 5.19135f, -0.22521f, -5.08782f, - 2.17316f, 1.30563f, 0.16777f, -2.17767f, -2.09904f, 1.37001f, 0.25091f, - -1.76743f, 1.57940f, 0.30544f, -2.39895f, -0.08532f, -1.77122f, 1.84010f, - -0.88449f, 0.79299f, -1.35368f, -4.54110f, 0.02244f, -5.11580f, 1.60883f, - 0.29352f, -6.47042f, -1.81426f, 1.24013f, 0.90980f, 7.93977f, 2.12555f, - 5.24720f, 4.19508f, 0.21499f, 11.06045f, -0.74752f, 0.89396f, 0.26422f, - 1.72332f, -1.25113f, -1.71136f, 0.13676f, -0.07867f, -0.96929f, 0.19911f, - 3.58233f, -0.76470f, -2.24162f, -2.87465f, 3.18736f, -}; - -static const float av1_rect_partition_nn_bias_128_layer1[3] = { - 1.09014f, - -0.53317f, - -0.55668f, -}; - -static const NN_CONFIG av1_rect_partition_nnconfig_128 = { - FEATURE_SIZE, // num_inputs - LABEL_SIZE, // num_outputs - 1, // num_hidden_layers - { - NUM_NODES, - }, // num_hidden_nodes - { av1_rect_partition_nn_weights_128_layer0, - av1_rect_partition_nn_weights_128_layer1 }, - { av1_rect_partition_nn_bias_128_layer0, - av1_rect_partition_nn_bias_128_layer1 } -}; -#undef FEATURE_SIZE -#undef NUM_NODES -#undef LABEL_SIZE - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_PARTITION_MODEL_WEIGHTS_H_ diff --git a/third_party/aom/av1/encoder/pickcdef.c b/third_party/aom/av1/encoder/pickcdef.c deleted file mode 100644 index 6d154a7d2..000000000 --- a/third_party/aom/av1/encoder/pickcdef.c +++ /dev/null @@ -1,526 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include - -#include "config/aom_scale_rtcd.h" - -#include "aom/aom_integer.h" -#include "av1/common/cdef.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/reconinter.h" -#include "av1/encoder/encoder.h" - -#define REDUCED_PRI_STRENGTHS 8 -#define REDUCED_TOTAL_STRENGTHS (REDUCED_PRI_STRENGTHS * CDEF_SEC_STRENGTHS) -#define TOTAL_STRENGTHS (CDEF_PRI_STRENGTHS * CDEF_SEC_STRENGTHS) - -static int priconv[REDUCED_PRI_STRENGTHS] = { 0, 1, 2, 3, 5, 7, 10, 13 }; - -/* Search for the best strength to add as an option, knowing we - already selected nb_strengths options. */ -static uint64_t search_one(int *lev, int nb_strengths, - uint64_t mse[][TOTAL_STRENGTHS], int sb_count, - int fast) { - uint64_t tot_mse[TOTAL_STRENGTHS]; - const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS; - int i, j; - uint64_t best_tot_mse = (uint64_t)1 << 63; - int best_id = 0; - memset(tot_mse, 0, sizeof(tot_mse)); - for (i = 0; i < sb_count; i++) { - int gi; - uint64_t best_mse = (uint64_t)1 << 63; - /* Find best mse among already selected options. */ - for (gi = 0; gi < nb_strengths; gi++) { - if (mse[i][lev[gi]] < best_mse) { - best_mse = mse[i][lev[gi]]; - } - } - /* Find best mse when adding each possible new option. */ - for (j = 0; j < total_strengths; j++) { - uint64_t best = best_mse; - if (mse[i][j] < best) best = mse[i][j]; - tot_mse[j] += best; - } - } - for (j = 0; j < total_strengths; j++) { - if (tot_mse[j] < best_tot_mse) { - best_tot_mse = tot_mse[j]; - best_id = j; - } - } - lev[nb_strengths] = best_id; - return best_tot_mse; -} - -/* Search for the best luma+chroma strength to add as an option, knowing we - already selected nb_strengths options. */ -static uint64_t search_one_dual(int *lev0, int *lev1, int nb_strengths, - uint64_t (**mse)[TOTAL_STRENGTHS], int sb_count, - int fast) { - uint64_t tot_mse[TOTAL_STRENGTHS][TOTAL_STRENGTHS]; - int i, j; - uint64_t best_tot_mse = (uint64_t)1 << 63; - int best_id0 = 0; - int best_id1 = 0; - const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS; - memset(tot_mse, 0, sizeof(tot_mse)); - for (i = 0; i < sb_count; i++) { - int gi; - uint64_t best_mse = (uint64_t)1 << 63; - /* Find best mse among already selected options. */ - for (gi = 0; gi < nb_strengths; gi++) { - uint64_t curr = mse[0][i][lev0[gi]]; - curr += mse[1][i][lev1[gi]]; - if (curr < best_mse) { - best_mse = curr; - } - } - /* Find best mse when adding each possible new option. */ - for (j = 0; j < total_strengths; j++) { - int k; - for (k = 0; k < total_strengths; k++) { - uint64_t best = best_mse; - uint64_t curr = mse[0][i][j]; - curr += mse[1][i][k]; - if (curr < best) best = curr; - tot_mse[j][k] += best; - } - } - } - for (j = 0; j < total_strengths; j++) { - int k; - for (k = 0; k < total_strengths; k++) { - if (tot_mse[j][k] < best_tot_mse) { - best_tot_mse = tot_mse[j][k]; - best_id0 = j; - best_id1 = k; - } - } - } - lev0[nb_strengths] = best_id0; - lev1[nb_strengths] = best_id1; - return best_tot_mse; -} - -/* Search for the set of strengths that minimizes mse. */ -static uint64_t joint_strength_search(int *best_lev, int nb_strengths, - uint64_t mse[][TOTAL_STRENGTHS], - int sb_count, int fast) { - uint64_t best_tot_mse; - int i; - best_tot_mse = (uint64_t)1 << 63; - /* Greedy search: add one strength options at a time. */ - for (i = 0; i < nb_strengths; i++) { - best_tot_mse = search_one(best_lev, i, mse, sb_count, fast); - } - /* Trying to refine the greedy search by reconsidering each - already-selected option. */ - if (!fast) { - for (i = 0; i < 4 * nb_strengths; i++) { - int j; - for (j = 0; j < nb_strengths - 1; j++) best_lev[j] = best_lev[j + 1]; - best_tot_mse = - search_one(best_lev, nb_strengths - 1, mse, sb_count, fast); - } - } - return best_tot_mse; -} - -/* Search for the set of luma+chroma strengths that minimizes mse. */ -static uint64_t joint_strength_search_dual(int *best_lev0, int *best_lev1, - int nb_strengths, - uint64_t (**mse)[TOTAL_STRENGTHS], - int sb_count, int fast) { - uint64_t best_tot_mse; - int i; - best_tot_mse = (uint64_t)1 << 63; - /* Greedy search: add one strength options at a time. */ - for (i = 0; i < nb_strengths; i++) { - best_tot_mse = - search_one_dual(best_lev0, best_lev1, i, mse, sb_count, fast); - } - /* Trying to refine the greedy search by reconsidering each - already-selected option. */ - for (i = 0; i < 4 * nb_strengths; i++) { - int j; - for (j = 0; j < nb_strengths - 1; j++) { - best_lev0[j] = best_lev0[j + 1]; - best_lev1[j] = best_lev1[j + 1]; - } - best_tot_mse = search_one_dual(best_lev0, best_lev1, nb_strengths - 1, mse, - sb_count, fast); - } - return best_tot_mse; -} - -/* FIXME: SSE-optimize this. */ -static void copy_sb16_16(uint16_t *dst, int dstride, const uint16_t *src, - int src_voffset, int src_hoffset, int sstride, - int vsize, int hsize) { - int r, c; - const uint16_t *base = &src[src_voffset * sstride + src_hoffset]; - for (r = 0; r < vsize; r++) { - for (c = 0; c < hsize; c++) { - dst[r * dstride + c] = base[r * sstride + c]; - } - } -} - -static INLINE uint64_t dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src, - int sstride, int coeff_shift) { - uint64_t svar = 0; - uint64_t dvar = 0; - uint64_t sum_s = 0; - uint64_t sum_d = 0; - uint64_t sum_s2 = 0; - uint64_t sum_d2 = 0; - uint64_t sum_sd = 0; - int i, j; - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) { - sum_s += src[i * sstride + j]; - sum_d += dst[i * dstride + j]; - sum_s2 += src[i * sstride + j] * src[i * sstride + j]; - sum_d2 += dst[i * dstride + j] * dst[i * dstride + j]; - sum_sd += src[i * sstride + j] * dst[i * dstride + j]; - } - } - /* Compute the variance -- the calculation cannot go negative. */ - svar = sum_s2 - ((sum_s * sum_s + 32) >> 6); - dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6); - return (uint64_t)floor( - .5 + (sum_d2 + sum_s2 - 2 * sum_sd) * .5 * - (svar + dvar + (400 << 2 * coeff_shift)) / - (sqrt((20000 << 4 * coeff_shift) + svar * (double)dvar))); -} - -static INLINE uint64_t mse_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src, - int sstride) { - uint64_t sum = 0; - int i, j; - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) { - int e = dst[i * dstride + j] - src[i * sstride + j]; - sum += e * e; - } - } - return sum; -} - -static INLINE uint64_t mse_4x4_16bit(uint16_t *dst, int dstride, uint16_t *src, - int sstride) { - uint64_t sum = 0; - int i, j; - for (i = 0; i < 4; i++) { - for (j = 0; j < 4; j++) { - int e = dst[i * dstride + j] - src[i * sstride + j]; - sum += e * e; - } - } - return sum; -} - -/* Compute MSE only on the blocks we filtered. */ -uint64_t compute_cdef_dist(uint16_t *dst, int dstride, uint16_t *src, - cdef_list *dlist, int cdef_count, BLOCK_SIZE bsize, - int coeff_shift, int pli) { - uint64_t sum = 0; - int bi, bx, by; - if (bsize == BLOCK_8X8) { - for (bi = 0; bi < cdef_count; bi++) { - by = dlist[bi].by; - bx = dlist[bi].bx; - if (pli == 0) { - sum += dist_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride, - &src[bi << (3 + 3)], 8, coeff_shift); - } else { - sum += mse_8x8_16bit(&dst[(by << 3) * dstride + (bx << 3)], dstride, - &src[bi << (3 + 3)], 8); - } - } - } else if (bsize == BLOCK_4X8) { - for (bi = 0; bi < cdef_count; bi++) { - by = dlist[bi].by; - bx = dlist[bi].bx; - sum += mse_4x4_16bit(&dst[(by << 3) * dstride + (bx << 2)], dstride, - &src[bi << (3 + 2)], 4); - sum += mse_4x4_16bit(&dst[((by << 3) + 4) * dstride + (bx << 2)], dstride, - &src[(bi << (3 + 2)) + 4 * 4], 4); - } - } else if (bsize == BLOCK_8X4) { - for (bi = 0; bi < cdef_count; bi++) { - by = dlist[bi].by; - bx = dlist[bi].bx; - sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 3)], dstride, - &src[bi << (2 + 3)], 8); - sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 3) + 4], dstride, - &src[(bi << (2 + 3)) + 4], 8); - } - } else { - assert(bsize == BLOCK_4X4); - for (bi = 0; bi < cdef_count; bi++) { - by = dlist[bi].by; - bx = dlist[bi].bx; - sum += mse_4x4_16bit(&dst[(by << 2) * dstride + (bx << 2)], dstride, - &src[bi << (2 + 2)], 4); - } - } - return sum >> 2 * coeff_shift; -} - -void av1_cdef_search(YV12_BUFFER_CONFIG *frame, const YV12_BUFFER_CONFIG *ref, - AV1_COMMON *cm, MACROBLOCKD *xd, int fast) { - int r, c; - int fbr, fbc; - uint16_t *src[3]; - uint16_t *ref_coeff[3]; - static cdef_list dlist[MI_SIZE_128X128 * MI_SIZE_128X128]; - int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } }; - int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } }; - int stride[3]; - int bsize[3]; - int mi_wide_l2[3]; - int mi_high_l2[3]; - int xdec[3]; - int ydec[3]; - int pli; - int cdef_count; - int coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0); - uint64_t best_tot_mse = (uint64_t)1 << 63; - uint64_t tot_mse; - int sb_count; - int nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; - int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64; - int *sb_index = aom_malloc(nvfb * nhfb * sizeof(*sb_index)); - int *selected_strength = aom_malloc(nvfb * nhfb * sizeof(*sb_index)); - uint64_t(*mse[2])[TOTAL_STRENGTHS]; - int pri_damping = 3 + (cm->base_qindex >> 6); - int sec_damping = 3 + (cm->base_qindex >> 6); - int i; - int nb_strengths; - int nb_strength_bits; - int quantizer; - double lambda; - const int num_planes = av1_num_planes(cm); - const int total_strengths = fast ? REDUCED_TOTAL_STRENGTHS : TOTAL_STRENGTHS; - DECLARE_ALIGNED(32, uint16_t, inbuf[CDEF_INBUF_SIZE]); - uint16_t *in; - DECLARE_ALIGNED(32, uint16_t, tmp_dst[1 << (MAX_SB_SIZE_LOG2 * 2)]); - quantizer = av1_ac_quant_Q3(cm->base_qindex, 0, cm->seq_params.bit_depth) >> - (cm->seq_params.bit_depth - 8); - lambda = .12 * quantizer * quantizer / 256.; - - av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0, - num_planes); - mse[0] = aom_malloc(sizeof(**mse) * nvfb * nhfb); - mse[1] = aom_malloc(sizeof(**mse) * nvfb * nhfb); - for (pli = 0; pli < num_planes; pli++) { - uint8_t *ref_buffer; - int ref_stride; - switch (pli) { - case 0: - ref_buffer = ref->y_buffer; - ref_stride = ref->y_stride; - break; - case 1: - ref_buffer = ref->u_buffer; - ref_stride = ref->uv_stride; - break; - case 2: - ref_buffer = ref->v_buffer; - ref_stride = ref->uv_stride; - break; - } - src[pli] = aom_memalign( - 32, sizeof(*src) * cm->mi_rows * cm->mi_cols * MI_SIZE * MI_SIZE); - ref_coeff[pli] = aom_memalign( - 32, sizeof(*ref_coeff) * cm->mi_rows * cm->mi_cols * MI_SIZE * MI_SIZE); - xdec[pli] = xd->plane[pli].subsampling_x; - ydec[pli] = xd->plane[pli].subsampling_y; - bsize[pli] = ydec[pli] ? (xdec[pli] ? BLOCK_4X4 : BLOCK_8X4) - : (xdec[pli] ? BLOCK_4X8 : BLOCK_8X8); - stride[pli] = cm->mi_cols << MI_SIZE_LOG2; - mi_wide_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_x; - mi_high_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_y; - - const int frame_height = - (cm->mi_rows * MI_SIZE) >> xd->plane[pli].subsampling_y; - const int frame_width = - (cm->mi_cols * MI_SIZE) >> xd->plane[pli].subsampling_x; - - for (r = 0; r < frame_height; ++r) { - for (c = 0; c < frame_width; ++c) { - if (cm->seq_params.use_highbitdepth) { - src[pli][r * stride[pli] + c] = CONVERT_TO_SHORTPTR( - xd->plane[pli].dst.buf)[r * xd->plane[pli].dst.stride + c]; - ref_coeff[pli][r * stride[pli] + c] = - CONVERT_TO_SHORTPTR(ref_buffer)[r * ref_stride + c]; - } else { - src[pli][r * stride[pli] + c] = - xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c]; - ref_coeff[pli][r * stride[pli] + c] = ref_buffer[r * ref_stride + c]; - } - } - } - } - in = inbuf + CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER; - sb_count = 0; - for (fbr = 0; fbr < nvfb; ++fbr) { - for (fbc = 0; fbc < nhfb; ++fbc) { - int nvb, nhb; - int gi; - int dirinit = 0; - nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc); - nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr); - int hb_step = 1; - int vb_step = 1; - BLOCK_SIZE bs = BLOCK_64X64; - MB_MODE_INFO *const mbmi = - cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride + - MI_SIZE_64X64 * fbc]; - if (((fbc & 1) && - (mbmi->sb_type == BLOCK_128X128 || mbmi->sb_type == BLOCK_128X64)) || - ((fbr & 1) && - (mbmi->sb_type == BLOCK_128X128 || mbmi->sb_type == BLOCK_64X128))) - continue; - if (mbmi->sb_type == BLOCK_128X128 || mbmi->sb_type == BLOCK_128X64 || - mbmi->sb_type == BLOCK_64X128) - bs = mbmi->sb_type; - if (bs == BLOCK_128X128 || bs == BLOCK_128X64) { - nhb = AOMMIN(MI_SIZE_128X128, cm->mi_cols - MI_SIZE_64X64 * fbc); - hb_step = 2; - } - if (bs == BLOCK_128X128 || bs == BLOCK_64X128) { - nvb = AOMMIN(MI_SIZE_128X128, cm->mi_rows - MI_SIZE_64X64 * fbr); - vb_step = 2; - } - // No filtering if the entire filter block is skipped - if (sb_all_skip(cm, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64)) continue; - cdef_count = sb_compute_cdef_list(cm, fbr * MI_SIZE_64X64, - fbc * MI_SIZE_64X64, dlist, bs); - for (pli = 0; pli < num_planes; pli++) { - for (i = 0; i < CDEF_INBUF_SIZE; i++) inbuf[i] = CDEF_VERY_LARGE; - for (gi = 0; gi < total_strengths; gi++) { - int threshold; - uint64_t curr_mse; - int sec_strength; - threshold = gi / CDEF_SEC_STRENGTHS; - if (fast) threshold = priconv[threshold]; - /* We avoid filtering the pixels for which some of the pixels to - average - are outside the frame. We could change the filter instead, but it - would add special cases for any future vectorization. */ - int yoff = CDEF_VBORDER * (fbr != 0); - int xoff = CDEF_HBORDER * (fbc != 0); - int ysize = (nvb << mi_high_l2[pli]) + - CDEF_VBORDER * (fbr + vb_step < nvfb) + yoff; - int xsize = (nhb << mi_wide_l2[pli]) + - CDEF_HBORDER * (fbc + hb_step < nhfb) + xoff; - sec_strength = gi % CDEF_SEC_STRENGTHS; - copy_sb16_16(&in[(-yoff * CDEF_BSTRIDE - xoff)], CDEF_BSTRIDE, - src[pli], - (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) - yoff, - (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]) - xoff, - stride[pli], ysize, xsize); - cdef_filter_fb(NULL, tmp_dst, CDEF_BSTRIDE, in, xdec[pli], ydec[pli], - dir, &dirinit, var, pli, dlist, cdef_count, threshold, - sec_strength + (sec_strength == 3), pri_damping, - sec_damping, coeff_shift); - curr_mse = compute_cdef_dist( - ref_coeff[pli] + - (fbr * MI_SIZE_64X64 << mi_high_l2[pli]) * stride[pli] + - (fbc * MI_SIZE_64X64 << mi_wide_l2[pli]), - stride[pli], tmp_dst, dlist, cdef_count, bsize[pli], coeff_shift, - pli); - if (pli < 2) - mse[pli][sb_count][gi] = curr_mse; - else - mse[1][sb_count][gi] += curr_mse; - sb_index[sb_count] = - MI_SIZE_64X64 * fbr * cm->mi_stride + MI_SIZE_64X64 * fbc; - } - } - sb_count++; - } - } - nb_strength_bits = 0; - /* Search for different number of signalling bits. */ - for (i = 0; i <= 3; i++) { - int j; - int best_lev0[CDEF_MAX_STRENGTHS]; - int best_lev1[CDEF_MAX_STRENGTHS] = { 0 }; - nb_strengths = 1 << i; - if (num_planes >= 3) - tot_mse = joint_strength_search_dual(best_lev0, best_lev1, nb_strengths, - mse, sb_count, fast); - else - tot_mse = joint_strength_search(best_lev0, nb_strengths, mse[0], sb_count, - fast); - /* Count superblock signalling cost. */ - tot_mse += (uint64_t)(sb_count * lambda * i); - /* Count header signalling cost. */ - tot_mse += (uint64_t)(nb_strengths * lambda * CDEF_STRENGTH_BITS); - if (tot_mse < best_tot_mse) { - best_tot_mse = tot_mse; - nb_strength_bits = i; - for (j = 0; j < 1 << nb_strength_bits; j++) { - cm->cdef_strengths[j] = best_lev0[j]; - cm->cdef_uv_strengths[j] = best_lev1[j]; - } - } - } - nb_strengths = 1 << nb_strength_bits; - - cm->cdef_bits = nb_strength_bits; - cm->nb_cdef_strengths = nb_strengths; - for (i = 0; i < sb_count; i++) { - int gi; - int best_gi; - uint64_t best_mse = (uint64_t)1 << 63; - best_gi = 0; - for (gi = 0; gi < cm->nb_cdef_strengths; gi++) { - uint64_t curr = mse[0][i][cm->cdef_strengths[gi]]; - if (num_planes >= 3) curr += mse[1][i][cm->cdef_uv_strengths[gi]]; - if (curr < best_mse) { - best_gi = gi; - best_mse = curr; - } - } - selected_strength[i] = best_gi; - cm->mi_grid_visible[sb_index[i]]->cdef_strength = best_gi; - } - - if (fast) { - for (int j = 0; j < nb_strengths; j++) { - cm->cdef_strengths[j] = - priconv[cm->cdef_strengths[j] / CDEF_SEC_STRENGTHS] * - CDEF_SEC_STRENGTHS + - (cm->cdef_strengths[j] % CDEF_SEC_STRENGTHS); - cm->cdef_uv_strengths[j] = - priconv[cm->cdef_uv_strengths[j] / CDEF_SEC_STRENGTHS] * - CDEF_SEC_STRENGTHS + - (cm->cdef_uv_strengths[j] % CDEF_SEC_STRENGTHS); - } - } - cm->cdef_pri_damping = pri_damping; - cm->cdef_sec_damping = sec_damping; - aom_free(mse[0]); - aom_free(mse[1]); - for (pli = 0; pli < num_planes; pli++) { - aom_free(src[pli]); - aom_free(ref_coeff[pli]); - } - aom_free(sb_index); - aom_free(selected_strength); -} diff --git a/third_party/aom/av1/encoder/picklpf.c b/third_party/aom/av1/encoder/picklpf.c deleted file mode 100644 index c5508e25c..000000000 --- a/third_party/aom/av1/encoder/picklpf.c +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include - -#include "config/aom_scale_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/psnr.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/mem.h" - -#include "av1/common/av1_loopfilter.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/quant_common.h" - -#include "av1/encoder/av1_quantize.h" -#include "av1/encoder/encoder.h" -#include "av1/encoder/picklpf.h" - -static void yv12_copy_plane(const YV12_BUFFER_CONFIG *src_bc, - YV12_BUFFER_CONFIG *dst_bc, int plane) { - switch (plane) { - case 0: aom_yv12_copy_y(src_bc, dst_bc); break; - case 1: aom_yv12_copy_u(src_bc, dst_bc); break; - case 2: aom_yv12_copy_v(src_bc, dst_bc); break; - default: assert(plane >= 0 && plane <= 2); break; - } -} - -int av1_get_max_filter_level(const AV1_COMP *cpi) { - if (cpi->oxcf.pass == 2) { - return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 - : MAX_LOOP_FILTER; - } else { - return MAX_LOOP_FILTER; - } -} - -static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, - AV1_COMP *const cpi, int filt_level, - int partial_frame, int plane, int dir) { - AV1_COMMON *const cm = &cpi->common; - int64_t filt_err; - - assert(plane >= 0 && plane <= 2); - int filter_level[2] = { filt_level, filt_level }; - if (plane == 0 && dir == 0) filter_level[1] = cm->lf.filter_level[1]; - if (plane == 0 && dir == 1) filter_level[0] = cm->lf.filter_level[0]; - - // set base filters for use of get_filter_level when in DELTA_Q_LF mode - switch (plane) { - case 0: - cm->lf.filter_level[0] = filter_level[0]; - cm->lf.filter_level[1] = filter_level[1]; - break; - case 1: cm->lf.filter_level_u = filter_level[0]; break; - case 2: cm->lf.filter_level_v = filter_level[0]; break; - } - - // TODO(any): please enable multi-thread and remove the flag when loop - // filter mask is compatible with multi-thread. -#if LOOP_FILTER_BITMASK - av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, 0, plane, - plane + 1, partial_frame); -#else - if (cpi->num_workers > 1) - av1_loop_filter_frame_mt(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, plane, - plane + 1, partial_frame, cpi->workers, - cpi->num_workers, &cpi->lf_row_sync); - else - av1_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, plane, - plane + 1, partial_frame); -#endif - - filt_err = aom_get_sse_plane(sd, cm->frame_to_show, plane, - cm->seq_params.use_highbitdepth); - - // Re-instate the unfiltered frame - yv12_copy_plane(&cpi->last_frame_uf, cm->frame_to_show, plane); - - return filt_err; -} - -static int search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, - int partial_frame, - const int *last_frame_filter_level, - double *best_cost_ret, int plane, int dir) { - const AV1_COMMON *const cm = &cpi->common; - const int min_filter_level = 0; - const int max_filter_level = av1_get_max_filter_level(cpi); - int filt_direction = 0; - int64_t best_err; - int filt_best; - MACROBLOCK *x = &cpi->td.mb; - - // Start the search at the previous frame filter level unless it is now out of - // range. - int lvl; - switch (plane) { - case 0: lvl = last_frame_filter_level[dir]; break; - case 1: lvl = last_frame_filter_level[2]; break; - case 2: lvl = last_frame_filter_level[3]; break; - default: assert(plane >= 0 && plane <= 2); return 0; - } - int filt_mid = clamp(lvl, min_filter_level, max_filter_level); - int filter_step = filt_mid < 16 ? 4 : filt_mid / 4; - // Sum squared error at each filter level - int64_t ss_err[MAX_LOOP_FILTER + 1]; - - // Set each entry to -1 - memset(ss_err, 0xFF, sizeof(ss_err)); - yv12_copy_plane(cm->frame_to_show, &cpi->last_frame_uf, plane); - best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame, plane, dir); - filt_best = filt_mid; - ss_err[filt_mid] = best_err; - - while (filter_step > 0) { - const int filt_high = AOMMIN(filt_mid + filter_step, max_filter_level); - const int filt_low = AOMMAX(filt_mid - filter_step, min_filter_level); - - // Bias against raising loop filter in favor of lowering it. - int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; - - if ((cpi->oxcf.pass == 2) && (cpi->twopass.section_intra_rating < 20)) - bias = (bias * cpi->twopass.section_intra_rating) / 20; - - // yx, bias less for large block size - if (cm->tx_mode != ONLY_4X4) bias >>= 1; - - if (filt_direction <= 0 && filt_low != filt_mid) { - // Get Low filter error score - if (ss_err[filt_low] < 0) { - ss_err[filt_low] = - try_filter_frame(sd, cpi, filt_low, partial_frame, plane, dir); - } - // If value is close to the best so far then bias towards a lower loop - // filter value. - if (ss_err[filt_low] < (best_err + bias)) { - // Was it actually better than the previous best? - if (ss_err[filt_low] < best_err) { - best_err = ss_err[filt_low]; - } - filt_best = filt_low; - } - } - - // Now look at filt_high - if (filt_direction >= 0 && filt_high != filt_mid) { - if (ss_err[filt_high] < 0) { - ss_err[filt_high] = - try_filter_frame(sd, cpi, filt_high, partial_frame, plane, dir); - } - // If value is significantly better than previous best, bias added against - // raising filter value - if (ss_err[filt_high] < (best_err - bias)) { - best_err = ss_err[filt_high]; - filt_best = filt_high; - } - } - - // Half the step distance if the best filter value was the same as last time - if (filt_best == filt_mid) { - filter_step /= 2; - filt_direction = 0; - } else { - filt_direction = (filt_best < filt_mid) ? -1 : 1; - filt_mid = filt_best; - } - } - - // Update best error - best_err = ss_err[filt_best]; - - if (best_cost_ret) *best_cost_ret = RDCOST_DBL(x->rdmult, 0, best_err); - return filt_best; -} - -void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi, - LPF_PICK_METHOD method) { - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - struct loopfilter *const lf = &cm->lf; - (void)sd; - - lf->sharpness_level = 0; - cpi->td.mb.rdmult = cpi->rd.RDMULT; - - if (method == LPF_PICK_MINIMAL_LPF) { - lf->filter_level[0] = 0; - lf->filter_level[1] = 0; - } else if (method >= LPF_PICK_FROM_Q) { - const int min_filter_level = 0; - const int max_filter_level = av1_get_max_filter_level(cpi); - const int q = av1_ac_quant_Q3(cm->base_qindex, 0, cm->seq_params.bit_depth); - // These values were determined by linear fitting the result of the - // searched level for 8 bit depth: - // Keyframes: filt_guess = q * 0.06699 - 1.60817 - // Other frames: filt_guess = q * 0.02295 + 2.48225 - // - // And high bit depth separately: - // filt_guess = q * 0.316206 + 3.87252 - int filt_guess; - switch (cm->seq_params.bit_depth) { - case AOM_BITS_8: - filt_guess = (cm->frame_type == KEY_FRAME) - ? ROUND_POWER_OF_TWO(q * 17563 - 421574, 18) - : ROUND_POWER_OF_TWO(q * 6017 + 650707, 18); - break; - case AOM_BITS_10: - filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20); - break; - case AOM_BITS_12: - filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22); - break; - default: - assert(0 && - "bit_depth should be AOM_BITS_8, AOM_BITS_10 " - "or AOM_BITS_12"); - return; - } - if (cm->seq_params.bit_depth != AOM_BITS_8 && cm->frame_type == KEY_FRAME) - filt_guess -= 4; - // TODO(chengchen): retrain the model for Y, U, V filter levels - lf->filter_level[0] = clamp(filt_guess, min_filter_level, max_filter_level); - lf->filter_level[1] = clamp(filt_guess, min_filter_level, max_filter_level); - lf->filter_level_u = clamp(filt_guess, min_filter_level, max_filter_level); - lf->filter_level_v = clamp(filt_guess, min_filter_level, max_filter_level); - } else { - const int last_frame_filter_level[4] = { lf->filter_level[0], - lf->filter_level[1], - lf->filter_level_u, - lf->filter_level_v }; - - lf->filter_level[0] = lf->filter_level[1] = - search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, - last_frame_filter_level, NULL, 0, 2); - lf->filter_level[0] = - search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, - last_frame_filter_level, NULL, 0, 0); - lf->filter_level[1] = - search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, - last_frame_filter_level, NULL, 0, 1); - - if (num_planes > 1) { - lf->filter_level_u = - search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, - last_frame_filter_level, NULL, 1, 0); - lf->filter_level_v = - search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE, - last_frame_filter_level, NULL, 2, 0); - } - } -} diff --git a/third_party/aom/av1/encoder/picklpf.h b/third_party/aom/av1/encoder/picklpf.h deleted file mode 100644 index 357097ae1..000000000 --- a/third_party/aom/av1/encoder/picklpf.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_PICKLPF_H_ -#define AOM_AV1_ENCODER_PICKLPF_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "av1/encoder/encoder.h" - -struct yv12_buffer_config; -struct AV1_COMP; -int av1_get_max_filter_level(const AV1_COMP *cpi); -void av1_pick_filter_level(const struct yv12_buffer_config *sd, - struct AV1_COMP *cpi, LPF_PICK_METHOD method); -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_PICKLPF_H_ diff --git a/third_party/aom/av1/encoder/pickrst.c b/third_party/aom/av1/encoder/pickrst.c deleted file mode 100644 index e7804f6b4..000000000 --- a/third_party/aom/av1/encoder/pickrst.c +++ /dev/null @@ -1,1362 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include -#include - -#include "config/aom_scale_rtcd.h" -#include "config/av1_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/binary_codes_writer.h" -#include "aom_dsp/psnr.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/mem.h" -#include "aom_ports/system_state.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/quant_common.h" -#include "av1/common/restoration.h" - -#include "av1/encoder/av1_quantize.h" -#include "av1/encoder/encoder.h" -#include "av1/encoder/mathutils.h" -#include "av1/encoder/picklpf.h" -#include "av1/encoder/pickrst.h" - -// When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed. -// When set to RESTORE_TYPES we allow switchable. -static const RestorationType force_restore_type = RESTORE_TYPES; - -// Number of Wiener iterations -#define NUM_WIENER_ITERS 5 - -// Penalty factor for use of dual sgr -#define DUAL_SGR_PENALTY_MULT 0.01 - -const int frame_level_restore_bits[RESTORE_TYPES] = { 2, 2, 2, 2 }; - -typedef int64_t (*sse_extractor_type)(const YV12_BUFFER_CONFIG *a, - const YV12_BUFFER_CONFIG *b); -typedef int64_t (*sse_part_extractor_type)(const YV12_BUFFER_CONFIG *a, - const YV12_BUFFER_CONFIG *b, - int hstart, int width, int vstart, - int height); - -#define NUM_EXTRACTORS (3 * (1 + 1)) - -static const sse_part_extractor_type sse_part_extractors[NUM_EXTRACTORS] = { - aom_get_y_sse_part, aom_get_u_sse_part, - aom_get_v_sse_part, aom_highbd_get_y_sse_part, - aom_highbd_get_u_sse_part, aom_highbd_get_v_sse_part, -}; - -static int64_t sse_restoration_unit(const RestorationTileLimits *limits, - const YV12_BUFFER_CONFIG *src, - const YV12_BUFFER_CONFIG *dst, int plane, - int highbd) { - return sse_part_extractors[3 * highbd + plane]( - src, dst, limits->h_start, limits->h_end - limits->h_start, - limits->v_start, limits->v_end - limits->v_start); -} - -typedef struct { - // The best coefficients for Wiener or Sgrproj restoration - WienerInfo wiener; - SgrprojInfo sgrproj; - - // The sum of squared errors for this rtype. - int64_t sse[RESTORE_SWITCHABLE_TYPES]; - - // The rtype to use for this unit given a frame rtype as - // index. Indices: WIENER, SGRPROJ, SWITCHABLE. - RestorationType best_rtype[RESTORE_TYPES - 1]; -} RestUnitSearchInfo; - -typedef struct { - const YV12_BUFFER_CONFIG *src; - YV12_BUFFER_CONFIG *dst; - - const AV1_COMMON *cm; - const MACROBLOCK *x; - int plane; - int plane_width; - int plane_height; - RestUnitSearchInfo *rusi; - - // Speed features - const SPEED_FEATURES *sf; - - uint8_t *dgd_buffer; - int dgd_stride; - const uint8_t *src_buffer; - int src_stride; - - // sse and bits are initialised by reset_rsc in search_rest_type - int64_t sse; - int64_t bits; - int tile_y0, tile_stripe0; - - // sgrproj and wiener are initialised by rsc_on_tile when starting the first - // tile in the frame. - SgrprojInfo sgrproj; - WienerInfo wiener; - AV1PixelRect tile_rect; -} RestSearchCtxt; - -static void rsc_on_tile(int tile_row, int tile_col, void *priv) { - (void)tile_col; - - RestSearchCtxt *rsc = (RestSearchCtxt *)priv; - set_default_sgrproj(&rsc->sgrproj); - set_default_wiener(&rsc->wiener); - - rsc->tile_stripe0 = - (tile_row == 0) ? 0 : rsc->cm->rst_end_stripe[tile_row - 1]; -} - -static void reset_rsc(RestSearchCtxt *rsc) { - rsc->sse = 0; - rsc->bits = 0; -} - -static void init_rsc(const YV12_BUFFER_CONFIG *src, const AV1_COMMON *cm, - const MACROBLOCK *x, const SPEED_FEATURES *sf, int plane, - RestUnitSearchInfo *rusi, YV12_BUFFER_CONFIG *dst, - RestSearchCtxt *rsc) { - rsc->src = src; - rsc->dst = dst; - rsc->cm = cm; - rsc->x = x; - rsc->plane = plane; - rsc->rusi = rusi; - rsc->sf = sf; - - const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show; - const int is_uv = plane != AOM_PLANE_Y; - rsc->plane_width = src->crop_widths[is_uv]; - rsc->plane_height = src->crop_heights[is_uv]; - rsc->src_buffer = src->buffers[plane]; - rsc->src_stride = src->strides[is_uv]; - rsc->dgd_buffer = dgd->buffers[plane]; - rsc->dgd_stride = dgd->strides[is_uv]; - rsc->tile_rect = av1_whole_frame_rect(cm, is_uv); - assert(src->crop_widths[is_uv] == dgd->crop_widths[is_uv]); - assert(src->crop_heights[is_uv] == dgd->crop_heights[is_uv]); -} - -static int64_t try_restoration_unit(const RestSearchCtxt *rsc, - const RestorationTileLimits *limits, - const AV1PixelRect *tile_rect, - const RestorationUnitInfo *rui) { - const AV1_COMMON *const cm = rsc->cm; - const int plane = rsc->plane; - const int is_uv = plane > 0; - const RestorationInfo *rsi = &cm->rst_info[plane]; - RestorationLineBuffers rlbs; - const int bit_depth = cm->seq_params.bit_depth; - const int highbd = cm->seq_params.use_highbitdepth; - - const YV12_BUFFER_CONFIG *fts = cm->frame_to_show; - // TODO(yunqing): For now, only use optimized LR filter in decoder. Can be - // also used in encoder. - const int optimized_lr = 0; - - av1_loop_restoration_filter_unit( - limits, rui, &rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0, - is_uv && cm->seq_params.subsampling_x, - is_uv && cm->seq_params.subsampling_y, highbd, bit_depth, - fts->buffers[plane], fts->strides[is_uv], rsc->dst->buffers[plane], - rsc->dst->strides[is_uv], cm->rst_tmpbuf, optimized_lr); - - return sse_restoration_unit(limits, rsc->src, rsc->dst, plane, highbd); -} - -int64_t av1_lowbd_pixel_proj_error_c(const uint8_t *src8, int width, int height, - int src_stride, const uint8_t *dat8, - int dat_stride, int32_t *flt0, - int flt0_stride, int32_t *flt1, - int flt1_stride, int xq[2], - const sgr_params_type *params) { - int i, j; - const uint8_t *src = src8; - const uint8_t *dat = dat8; - int64_t err = 0; - if (params->r[0] > 0 && params->r[1] > 0) { - for (i = 0; i < height; ++i) { - for (j = 0; j < width; ++j) { - assert(flt1[j] < (1 << 15) && flt1[j] > -(1 << 15)); - assert(flt0[j] < (1 << 15) && flt0[j] > -(1 << 15)); - const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS); - int32_t v = u << SGRPROJ_PRJ_BITS; - v += xq[0] * (flt0[j] - u) + xq[1] * (flt1[j] - u); - const int32_t e = - ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j]; - err += e * e; - } - dat += dat_stride; - src += src_stride; - flt0 += flt0_stride; - flt1 += flt1_stride; - } - } else if (params->r[0] > 0) { - for (i = 0; i < height; ++i) { - for (j = 0; j < width; ++j) { - assert(flt0[j] < (1 << 15) && flt0[j] > -(1 << 15)); - const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS); - int32_t v = u << SGRPROJ_PRJ_BITS; - v += xq[0] * (flt0[j] - u); - const int32_t e = - ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j]; - err += e * e; - } - dat += dat_stride; - src += src_stride; - flt0 += flt0_stride; - } - } else if (params->r[1] > 0) { - for (i = 0; i < height; ++i) { - for (j = 0; j < width; ++j) { - assert(flt1[j] < (1 << 15) && flt1[j] > -(1 << 15)); - const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS); - int32_t v = u << SGRPROJ_PRJ_BITS; - v += xq[1] * (flt1[j] - u); - const int32_t e = - ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j]; - err += e * e; - } - dat += dat_stride; - src += src_stride; - flt1 += flt1_stride; - } - } else { - for (i = 0; i < height; ++i) { - for (j = 0; j < width; ++j) { - const int32_t e = (int32_t)(dat[j]) - src[j]; - err += e * e; - } - dat += dat_stride; - src += src_stride; - } - } - - return err; -} - -static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height, - int src_stride, const uint8_t *dat8, - int dat_stride, int use_highbitdepth, - int32_t *flt0, int flt0_stride, - int32_t *flt1, int flt1_stride, int *xqd, - const sgr_params_type *params) { - int i, j; - int64_t err = 0; - int xq[2]; - decode_xq(xqd, xq, params); - if (!use_highbitdepth) { - err = av1_lowbd_pixel_proj_error(src8, width, height, src_stride, dat8, - dat_stride, flt0, flt0_stride, flt1, - flt1_stride, xq, params); - } else { - const uint16_t *src = CONVERT_TO_SHORTPTR(src8); - const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8); - const int32_t half = 1 << (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS - 1); - if (params->r[0] > 0 && params->r[1] > 0) { - int xq0 = xq[0]; - int xq1 = xq[1]; - for (i = 0; i < height; ++i) { - for (j = 0; j < width; ++j) { - const int32_t d = dat[j]; - const int32_t s = src[j]; - const int32_t u = (int32_t)(d << SGRPROJ_RST_BITS); - int32_t v0 = flt0[j] - u; - int32_t v1 = flt1[j] - u; - int32_t v = half; - v += xq0 * v0; - v += xq1 * v1; - const int32_t e = - (v >> (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS)) + d - s; - err += e * e; - } - dat += dat_stride; - flt0 += flt0_stride; - flt1 += flt1_stride; - src += src_stride; - } - } else if (params->r[0] > 0 || params->r[1] > 0) { - int exq; - int32_t *flt; - int flt_stride; - if (params->r[0] > 0) { - exq = xq[0]; - flt = flt0; - flt_stride = flt0_stride; - } else { - exq = xq[1]; - flt = flt1; - flt_stride = flt1_stride; - } - for (i = 0; i < height; ++i) { - for (j = 0; j < width; ++j) { - const int32_t d = dat[j]; - const int32_t s = src[j]; - const int32_t u = (int32_t)(d << SGRPROJ_RST_BITS); - int32_t v = half; - v += exq * (flt[j] - u); - const int32_t e = - (v >> (SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS)) + d - s; - err += e * e; - } - dat += dat_stride; - flt += flt_stride; - src += src_stride; - } - } else { - for (i = 0; i < height; ++i) { - for (j = 0; j < width; ++j) { - const int32_t d = dat[j]; - const int32_t s = src[j]; - const int32_t e = d - s; - err += e * e; - } - dat += dat_stride; - src += src_stride; - } - } - } - return err; -} - -#define USE_SGRPROJ_REFINEMENT_SEARCH 1 -static int64_t finer_search_pixel_proj_error( - const uint8_t *src8, int width, int height, int src_stride, - const uint8_t *dat8, int dat_stride, int use_highbitdepth, int32_t *flt0, - int flt0_stride, int32_t *flt1, int flt1_stride, int start_step, int *xqd, - const sgr_params_type *params) { - int64_t err = get_pixel_proj_error( - src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth, flt0, - flt0_stride, flt1, flt1_stride, xqd, params); - (void)start_step; -#if USE_SGRPROJ_REFINEMENT_SEARCH - int64_t err2; - int tap_min[] = { SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MIN1 }; - int tap_max[] = { SGRPROJ_PRJ_MAX0, SGRPROJ_PRJ_MAX1 }; - for (int s = start_step; s >= 1; s >>= 1) { - for (int p = 0; p < 2; ++p) { - if ((params->r[0] == 0 && p == 0) || (params->r[1] == 0 && p == 1)) { - continue; - } - int skip = 0; - do { - if (xqd[p] - s >= tap_min[p]) { - xqd[p] -= s; - err2 = - get_pixel_proj_error(src8, width, height, src_stride, dat8, - dat_stride, use_highbitdepth, flt0, - flt0_stride, flt1, flt1_stride, xqd, params); - if (err2 > err) { - xqd[p] += s; - } else { - err = err2; - skip = 1; - // At the highest step size continue moving in the same direction - if (s == start_step) continue; - } - } - break; - } while (1); - if (skip) break; - do { - if (xqd[p] + s <= tap_max[p]) { - xqd[p] += s; - err2 = - get_pixel_proj_error(src8, width, height, src_stride, dat8, - dat_stride, use_highbitdepth, flt0, - flt0_stride, flt1, flt1_stride, xqd, params); - if (err2 > err) { - xqd[p] -= s; - } else { - err = err2; - // At the highest step size continue moving in the same direction - if (s == start_step) continue; - } - } - break; - } while (1); - } - } -#endif // USE_SGRPROJ_REFINEMENT_SEARCH - return err; -} - -static void get_proj_subspace(const uint8_t *src8, int width, int height, - int src_stride, const uint8_t *dat8, - int dat_stride, int use_highbitdepth, - int32_t *flt0, int flt0_stride, int32_t *flt1, - int flt1_stride, int *xq, - const sgr_params_type *params) { - int i, j; - double H[2][2] = { { 0, 0 }, { 0, 0 } }; - double C[2] = { 0, 0 }; - double Det; - double x[2]; - const int size = width * height; - - aom_clear_system_state(); - - // Default - xq[0] = 0; - xq[1] = 0; - if (!use_highbitdepth) { - const uint8_t *src = src8; - const uint8_t *dat = dat8; - for (i = 0; i < height; ++i) { - for (j = 0; j < width; ++j) { - const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS); - const double s = - (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u; - const double f1 = - (params->r[0] > 0) ? (double)flt0[i * flt0_stride + j] - u : 0; - const double f2 = - (params->r[1] > 0) ? (double)flt1[i * flt1_stride + j] - u : 0; - H[0][0] += f1 * f1; - H[1][1] += f2 * f2; - H[0][1] += f1 * f2; - C[0] += f1 * s; - C[1] += f2 * s; - } - } - } else { - const uint16_t *src = CONVERT_TO_SHORTPTR(src8); - const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8); - for (i = 0; i < height; ++i) { - for (j = 0; j < width; ++j) { - const double u = (double)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS); - const double s = - (double)(src[i * src_stride + j] << SGRPROJ_RST_BITS) - u; - const double f1 = - (params->r[0] > 0) ? (double)flt0[i * flt0_stride + j] - u : 0; - const double f2 = - (params->r[1] > 0) ? (double)flt1[i * flt1_stride + j] - u : 0; - H[0][0] += f1 * f1; - H[1][1] += f2 * f2; - H[0][1] += f1 * f2; - C[0] += f1 * s; - C[1] += f2 * s; - } - } - } - H[0][0] /= size; - H[0][1] /= size; - H[1][1] /= size; - H[1][0] = H[0][1]; - C[0] /= size; - C[1] /= size; - if (params->r[0] == 0) { - // H matrix is now only the scalar H[1][1] - // C vector is now only the scalar C[1] - Det = H[1][1]; - if (Det < 1e-8) return; // ill-posed, return default values - x[0] = 0; - x[1] = C[1] / Det; - - xq[0] = 0; - xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS)); - } else if (params->r[1] == 0) { - // H matrix is now only the scalar H[0][0] - // C vector is now only the scalar C[0] - Det = H[0][0]; - if (Det < 1e-8) return; // ill-posed, return default values - x[0] = C[0] / Det; - x[1] = 0; - - xq[0] = (int)rint(x[0] * (1 << SGRPROJ_PRJ_BITS)); - xq[1] = 0; - } else { - Det = (H[0][0] * H[1][1] - H[0][1] * H[1][0]); - if (Det < 1e-8) return; // ill-posed, return default values - x[0] = (H[1][1] * C[0] - H[0][1] * C[1]) / Det; - x[1] = (H[0][0] * C[1] - H[1][0] * C[0]) / Det; - - xq[0] = (int)rint(x[0] * (1 << SGRPROJ_PRJ_BITS)); - xq[1] = (int)rint(x[1] * (1 << SGRPROJ_PRJ_BITS)); - } -} - -void encode_xq(int *xq, int *xqd, const sgr_params_type *params) { - if (params->r[0] == 0) { - xqd[0] = 0; - xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xq[1], SGRPROJ_PRJ_MIN1, - SGRPROJ_PRJ_MAX1); - } else if (params->r[1] == 0) { - xqd[0] = clamp(xq[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0); - xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xqd[0], SGRPROJ_PRJ_MIN1, - SGRPROJ_PRJ_MAX1); - } else { - xqd[0] = clamp(xq[0], SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MAX0); - xqd[1] = clamp((1 << SGRPROJ_PRJ_BITS) - xqd[0] - xq[1], SGRPROJ_PRJ_MIN1, - SGRPROJ_PRJ_MAX1); - } -} - -// Apply the self-guided filter across an entire restoration unit. -static void apply_sgr(int sgr_params_idx, const uint8_t *dat8, int width, - int height, int dat_stride, int use_highbd, int bit_depth, - int pu_width, int pu_height, int32_t *flt0, int32_t *flt1, - int flt_stride) { - for (int i = 0; i < height; i += pu_height) { - const int h = AOMMIN(pu_height, height - i); - int32_t *flt0_row = flt0 + i * flt_stride; - int32_t *flt1_row = flt1 + i * flt_stride; - const uint8_t *dat8_row = dat8 + i * dat_stride; - - // Iterate over the stripe in blocks of width pu_width - for (int j = 0; j < width; j += pu_width) { - const int w = AOMMIN(pu_width, width - j); - const int ret = av1_selfguided_restoration( - dat8_row + j, w, h, dat_stride, flt0_row + j, flt1_row + j, - flt_stride, sgr_params_idx, bit_depth, use_highbd); - (void)ret; - assert(!ret); - } - } -} - -static SgrprojInfo search_selfguided_restoration( - const uint8_t *dat8, int width, int height, int dat_stride, - const uint8_t *src8, int src_stride, int use_highbitdepth, int bit_depth, - int pu_width, int pu_height, int32_t *rstbuf) { - int32_t *flt0 = rstbuf; - int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX; - int ep, bestep = 0; - int64_t besterr = -1; - int exqd[2], bestxqd[2] = { 0, 0 }; - int flt_stride = ((width + 7) & ~7) + 8; - assert(pu_width == (RESTORATION_PROC_UNIT_SIZE >> 1) || - pu_width == RESTORATION_PROC_UNIT_SIZE); - assert(pu_height == (RESTORATION_PROC_UNIT_SIZE >> 1) || - pu_height == RESTORATION_PROC_UNIT_SIZE); - - for (ep = 0; ep < SGRPROJ_PARAMS; ep++) { - int exq[2]; - apply_sgr(ep, dat8, width, height, dat_stride, use_highbitdepth, bit_depth, - pu_width, pu_height, flt0, flt1, flt_stride); - aom_clear_system_state(); - const sgr_params_type *const params = &sgr_params[ep]; - get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride, - use_highbitdepth, flt0, flt_stride, flt1, flt_stride, exq, - params); - aom_clear_system_state(); - encode_xq(exq, exqd, params); - int64_t err = finer_search_pixel_proj_error( - src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth, - flt0, flt_stride, flt1, flt_stride, 2, exqd, params); - if (besterr == -1 || err < besterr) { - bestep = ep; - besterr = err; - bestxqd[0] = exqd[0]; - bestxqd[1] = exqd[1]; - } - } - - SgrprojInfo ret; - ret.ep = bestep; - ret.xqd[0] = bestxqd[0]; - ret.xqd[1] = bestxqd[1]; - return ret; -} - -static int count_sgrproj_bits(SgrprojInfo *sgrproj_info, - SgrprojInfo *ref_sgrproj_info) { - int bits = SGRPROJ_PARAMS_BITS; - const sgr_params_type *params = &sgr_params[sgrproj_info->ep]; - if (params->r[0] > 0) - bits += aom_count_primitive_refsubexpfin( - SGRPROJ_PRJ_MAX0 - SGRPROJ_PRJ_MIN0 + 1, SGRPROJ_PRJ_SUBEXP_K, - ref_sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0, - sgrproj_info->xqd[0] - SGRPROJ_PRJ_MIN0); - if (params->r[1] > 0) - bits += aom_count_primitive_refsubexpfin( - SGRPROJ_PRJ_MAX1 - SGRPROJ_PRJ_MIN1 + 1, SGRPROJ_PRJ_SUBEXP_K, - ref_sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1, - sgrproj_info->xqd[1] - SGRPROJ_PRJ_MIN1); - return bits; -} - -static void search_sgrproj(const RestorationTileLimits *limits, - const AV1PixelRect *tile, int rest_unit_idx, - void *priv, int32_t *tmpbuf, - RestorationLineBuffers *rlbs) { - (void)rlbs; - RestSearchCtxt *rsc = (RestSearchCtxt *)priv; - RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx]; - - const MACROBLOCK *const x = rsc->x; - const AV1_COMMON *const cm = rsc->cm; - const int highbd = cm->seq_params.use_highbitdepth; - const int bit_depth = cm->seq_params.bit_depth; - - uint8_t *dgd_start = - rsc->dgd_buffer + limits->v_start * rsc->dgd_stride + limits->h_start; - const uint8_t *src_start = - rsc->src_buffer + limits->v_start * rsc->src_stride + limits->h_start; - - const int is_uv = rsc->plane > 0; - const int ss_x = is_uv && cm->seq_params.subsampling_x; - const int ss_y = is_uv && cm->seq_params.subsampling_y; - const int procunit_width = RESTORATION_PROC_UNIT_SIZE >> ss_x; - const int procunit_height = RESTORATION_PROC_UNIT_SIZE >> ss_y; - - rusi->sgrproj = search_selfguided_restoration( - dgd_start, limits->h_end - limits->h_start, - limits->v_end - limits->v_start, rsc->dgd_stride, src_start, - rsc->src_stride, highbd, bit_depth, procunit_width, procunit_height, - tmpbuf); - - RestorationUnitInfo rui; - rui.restoration_type = RESTORE_SGRPROJ; - rui.sgrproj_info = rusi->sgrproj; - - rusi->sse[RESTORE_SGRPROJ] = try_restoration_unit(rsc, limits, tile, &rui); - - const int64_t bits_none = x->sgrproj_restore_cost[0]; - const int64_t bits_sgr = x->sgrproj_restore_cost[1] + - (count_sgrproj_bits(&rusi->sgrproj, &rsc->sgrproj) - << AV1_PROB_COST_SHIFT); - - double cost_none = - RDCOST_DBL(x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE]); - double cost_sgr = - RDCOST_DBL(x->rdmult, bits_sgr >> 4, rusi->sse[RESTORE_SGRPROJ]); - if (rusi->sgrproj.ep < 10) - cost_sgr *= (1 + DUAL_SGR_PENALTY_MULT * rsc->sf->dual_sgr_penalty_level); - - RestorationType rtype = - (cost_sgr < cost_none) ? RESTORE_SGRPROJ : RESTORE_NONE; - rusi->best_rtype[RESTORE_SGRPROJ - 1] = rtype; - - rsc->sse += rusi->sse[rtype]; - rsc->bits += (cost_sgr < cost_none) ? bits_sgr : bits_none; - if (cost_sgr < cost_none) rsc->sgrproj = rusi->sgrproj; -} - -void av1_compute_stats_c(int wiener_win, const uint8_t *dgd, const uint8_t *src, - int h_start, int h_end, int v_start, int v_end, - int dgd_stride, int src_stride, double *M, double *H) { - int i, j, k, l; - double Y[WIENER_WIN2]; - const int wiener_win2 = wiener_win * wiener_win; - const int wiener_halfwin = (wiener_win >> 1); - const double avg = - find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride); - - memset(M, 0, sizeof(*M) * wiener_win2); - memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2); - for (i = v_start; i < v_end; i++) { - for (j = h_start; j < h_end; j++) { - const double X = (double)src[i * src_stride + j] - avg; - int idx = 0; - for (k = -wiener_halfwin; k <= wiener_halfwin; k++) { - for (l = -wiener_halfwin; l <= wiener_halfwin; l++) { - Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg; - idx++; - } - } - assert(idx == wiener_win2); - for (k = 0; k < wiener_win2; ++k) { - M[k] += Y[k] * X; - for (l = k; l < wiener_win2; ++l) { - // H is a symmetric matrix, so we only need to fill out the upper - // triangle here. We can copy it down to the lower triangle outside - // the (i, j) loops. - H[k * wiener_win2 + l] += Y[k] * Y[l]; - } - } - } - } - for (k = 0; k < wiener_win2; ++k) { - for (l = k + 1; l < wiener_win2; ++l) { - H[l * wiener_win2 + k] = H[k * wiener_win2 + l]; - } - } -} - -static double find_average_highbd(const uint16_t *src, int h_start, int h_end, - int v_start, int v_end, int stride) { - uint64_t sum = 0; - double avg = 0; - int i, j; - aom_clear_system_state(); - for (i = v_start; i < v_end; i++) - for (j = h_start; j < h_end; j++) sum += src[i * stride + j]; - avg = (double)sum / ((v_end - v_start) * (h_end - h_start)); - return avg; -} - -static AOM_FORCE_INLINE void compute_stats_highbd( - int wiener_win, const uint8_t *dgd8, const uint8_t *src8, int h_start, - int h_end, int v_start, int v_end, int dgd_stride, int src_stride, - double *M, double *H) { - int i, j, k, l; - double Y[WIENER_WIN2]; - const int wiener_win2 = wiener_win * wiener_win; - const int wiener_halfwin = (wiener_win >> 1); - const uint16_t *src = CONVERT_TO_SHORTPTR(src8); - const uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8); - const double avg = - find_average_highbd(dgd, h_start, h_end, v_start, v_end, dgd_stride); - - memset(M, 0, sizeof(*M) * wiener_win2); - memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2); - for (i = v_start; i < v_end; i++) { - for (j = h_start; j < h_end; j++) { - const double X = (double)src[i * src_stride + j] - avg; - int idx = 0; - for (k = -wiener_halfwin; k <= wiener_halfwin; k++) { - for (l = -wiener_halfwin; l <= wiener_halfwin; l++) { - Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg; - idx++; - } - } - assert(idx == wiener_win2); - for (k = 0; k < wiener_win2; ++k) { - double Yk = Y[k]; - M[k] += Yk * X; - double *H2 = &H[k * wiener_win2]; - H2[k] += Yk * Yk; - for (l = k + 1; l < wiener_win2; ++l) { - // H is a symmetric matrix, so we only need to fill out the upper - // triangle here. We can copy it down to the lower triangle outside - // the (i, j) loops. - H2[l] += Yk * Y[l]; - } - } - } - } - for (k = 0; k < wiener_win2; ++k) { - for (l = k + 1; l < wiener_win2; ++l) { - H[l * wiener_win2 + k] = H[k * wiener_win2 + l]; - } - } -} - -static INLINE int wrap_index(int i, int wiener_win) { - const int wiener_halfwin1 = (wiener_win >> 1) + 1; - return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i); -} - -// Fix vector b, update vector a -static void update_a_sep_sym(int wiener_win, double **Mc, double **Hc, - double *a, double *b) { - int i, j; - double S[WIENER_WIN]; - double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1]; - const int wiener_win2 = wiener_win * wiener_win; - const int wiener_halfwin1 = (wiener_win >> 1) + 1; - memset(A, 0, sizeof(A)); - memset(B, 0, sizeof(B)); - for (i = 0; i < wiener_win; i++) { - for (j = 0; j < wiener_win; ++j) { - const int jj = wrap_index(j, wiener_win); - A[jj] += Mc[i][j] * b[i]; - } - } - for (i = 0; i < wiener_win; i++) { - for (j = 0; j < wiener_win; j++) { - int k, l; - for (k = 0; k < wiener_win; ++k) - for (l = 0; l < wiener_win; ++l) { - const int kk = wrap_index(k, wiener_win); - const int ll = wrap_index(l, wiener_win); - B[ll * wiener_halfwin1 + kk] += - Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] * b[j]; - } - } - } - // Normalization enforcement in the system of equations itself - for (i = 0; i < wiener_halfwin1 - 1; ++i) - A[i] -= - A[wiener_halfwin1 - 1] * 2 + - B[i * wiener_halfwin1 + wiener_halfwin1 - 1] - - 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)]; - for (i = 0; i < wiener_halfwin1 - 1; ++i) - for (j = 0; j < wiener_halfwin1 - 1; ++j) - B[i * wiener_halfwin1 + j] -= - 2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] + - B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] - - 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + - (wiener_halfwin1 - 1)]); - if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) { - S[wiener_halfwin1 - 1] = 1.0; - for (i = wiener_halfwin1; i < wiener_win; ++i) { - S[i] = S[wiener_win - 1 - i]; - S[wiener_halfwin1 - 1] -= 2 * S[i]; - } - memcpy(a, S, wiener_win * sizeof(*a)); - } -} - -// Fix vector a, update vector b -static void update_b_sep_sym(int wiener_win, double **Mc, double **Hc, - double *a, double *b) { - int i, j; - double S[WIENER_WIN]; - double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1]; - const int wiener_win2 = wiener_win * wiener_win; - const int wiener_halfwin1 = (wiener_win >> 1) + 1; - memset(A, 0, sizeof(A)); - memset(B, 0, sizeof(B)); - for (i = 0; i < wiener_win; i++) { - const int ii = wrap_index(i, wiener_win); - for (j = 0; j < wiener_win; j++) A[ii] += Mc[i][j] * a[j]; - } - - for (i = 0; i < wiener_win; i++) { - for (j = 0; j < wiener_win; j++) { - const int ii = wrap_index(i, wiener_win); - const int jj = wrap_index(j, wiener_win); - int k, l; - for (k = 0; k < wiener_win; ++k) - for (l = 0; l < wiener_win; ++l) - B[jj * wiener_halfwin1 + ii] += - Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] * a[l]; - } - } - // Normalization enforcement in the system of equations itself - for (i = 0; i < wiener_halfwin1 - 1; ++i) - A[i] -= - A[wiener_halfwin1 - 1] * 2 + - B[i * wiener_halfwin1 + wiener_halfwin1 - 1] - - 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)]; - for (i = 0; i < wiener_halfwin1 - 1; ++i) - for (j = 0; j < wiener_halfwin1 - 1; ++j) - B[i * wiener_halfwin1 + j] -= - 2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] + - B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] - - 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + - (wiener_halfwin1 - 1)]); - if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) { - S[wiener_halfwin1 - 1] = 1.0; - for (i = wiener_halfwin1; i < wiener_win; ++i) { - S[i] = S[wiener_win - 1 - i]; - S[wiener_halfwin1 - 1] -= 2 * S[i]; - } - memcpy(b, S, wiener_win * sizeof(*b)); - } -} - -static int wiener_decompose_sep_sym(int wiener_win, double *M, double *H, - double *a, double *b) { - static const int init_filt[WIENER_WIN] = { - WIENER_FILT_TAP0_MIDV, WIENER_FILT_TAP1_MIDV, WIENER_FILT_TAP2_MIDV, - WIENER_FILT_TAP3_MIDV, WIENER_FILT_TAP2_MIDV, WIENER_FILT_TAP1_MIDV, - WIENER_FILT_TAP0_MIDV, - }; - double *Hc[WIENER_WIN2]; - double *Mc[WIENER_WIN]; - int i, j, iter; - const int plane_off = (WIENER_WIN - wiener_win) >> 1; - const int wiener_win2 = wiener_win * wiener_win; - for (i = 0; i < wiener_win; i++) { - a[i] = b[i] = (double)init_filt[i + plane_off] / WIENER_FILT_STEP; - } - for (i = 0; i < wiener_win; i++) { - Mc[i] = M + i * wiener_win; - for (j = 0; j < wiener_win; j++) { - Hc[i * wiener_win + j] = - H + i * wiener_win * wiener_win2 + j * wiener_win; - } - } - - iter = 1; - while (iter < NUM_WIENER_ITERS) { - update_a_sep_sym(wiener_win, Mc, Hc, a, b); - update_b_sep_sym(wiener_win, Mc, Hc, a, b); - iter++; - } - return 1; -} - -// Computes the function x'*H*x - x'*M for the learned 2D filter x, and compares -// against identity filters; Final score is defined as the difference between -// the function values -static double compute_score(int wiener_win, double *M, double *H, - InterpKernel vfilt, InterpKernel hfilt) { - double ab[WIENER_WIN * WIENER_WIN]; - int i, k, l; - double P = 0, Q = 0; - double iP = 0, iQ = 0; - double Score, iScore; - double a[WIENER_WIN], b[WIENER_WIN]; - const int plane_off = (WIENER_WIN - wiener_win) >> 1; - const int wiener_win2 = wiener_win * wiener_win; - - aom_clear_system_state(); - - a[WIENER_HALFWIN] = b[WIENER_HALFWIN] = 1.0; - for (i = 0; i < WIENER_HALFWIN; ++i) { - a[i] = a[WIENER_WIN - i - 1] = (double)vfilt[i] / WIENER_FILT_STEP; - b[i] = b[WIENER_WIN - i - 1] = (double)hfilt[i] / WIENER_FILT_STEP; - a[WIENER_HALFWIN] -= 2 * a[i]; - b[WIENER_HALFWIN] -= 2 * b[i]; - } - memset(ab, 0, sizeof(ab)); - for (k = 0; k < wiener_win; ++k) { - for (l = 0; l < wiener_win; ++l) - ab[k * wiener_win + l] = a[l + plane_off] * b[k + plane_off]; - } - for (k = 0; k < wiener_win2; ++k) { - P += ab[k] * M[k]; - for (l = 0; l < wiener_win2; ++l) - Q += ab[k] * H[k * wiener_win2 + l] * ab[l]; - } - Score = Q - 2 * P; - - iP = M[wiener_win2 >> 1]; - iQ = H[(wiener_win2 >> 1) * wiener_win2 + (wiener_win2 >> 1)]; - iScore = iQ - 2 * iP; - - return Score - iScore; -} - -static void quantize_sym_filter(int wiener_win, double *f, InterpKernel fi) { - int i; - const int wiener_halfwin = (wiener_win >> 1); - for (i = 0; i < wiener_halfwin; ++i) { - fi[i] = RINT(f[i] * WIENER_FILT_STEP); - } - // Specialize for 7-tap filter - if (wiener_win == WIENER_WIN) { - fi[0] = CLIP(fi[0], WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP0_MAXV); - fi[1] = CLIP(fi[1], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV); - fi[2] = CLIP(fi[2], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV); - } else { - fi[2] = CLIP(fi[1], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV); - fi[1] = CLIP(fi[0], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV); - fi[0] = 0; - } - // Satisfy filter constraints - fi[WIENER_WIN - 1] = fi[0]; - fi[WIENER_WIN - 2] = fi[1]; - fi[WIENER_WIN - 3] = fi[2]; - // The central element has an implicit +WIENER_FILT_STEP - fi[3] = -2 * (fi[0] + fi[1] + fi[2]); -} - -static int count_wiener_bits(int wiener_win, WienerInfo *wiener_info, - WienerInfo *ref_wiener_info) { - int bits = 0; - if (wiener_win == WIENER_WIN) - bits += aom_count_primitive_refsubexpfin( - WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1, - WIENER_FILT_TAP0_SUBEXP_K, - ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV, - wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV); - bits += aom_count_primitive_refsubexpfin( - WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1, - WIENER_FILT_TAP1_SUBEXP_K, - ref_wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV, - wiener_info->vfilter[1] - WIENER_FILT_TAP1_MINV); - bits += aom_count_primitive_refsubexpfin( - WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1, - WIENER_FILT_TAP2_SUBEXP_K, - ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV, - wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV); - if (wiener_win == WIENER_WIN) - bits += aom_count_primitive_refsubexpfin( - WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1, - WIENER_FILT_TAP0_SUBEXP_K, - ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV, - wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV); - bits += aom_count_primitive_refsubexpfin( - WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1, - WIENER_FILT_TAP1_SUBEXP_K, - ref_wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV, - wiener_info->hfilter[1] - WIENER_FILT_TAP1_MINV); - bits += aom_count_primitive_refsubexpfin( - WIENER_FILT_TAP2_MAXV - WIENER_FILT_TAP2_MINV + 1, - WIENER_FILT_TAP2_SUBEXP_K, - ref_wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV, - wiener_info->hfilter[2] - WIENER_FILT_TAP2_MINV); - return bits; -} - -#define USE_WIENER_REFINEMENT_SEARCH 1 -static int64_t finer_tile_search_wiener(const RestSearchCtxt *rsc, - const RestorationTileLimits *limits, - const AV1PixelRect *tile, - RestorationUnitInfo *rui, - int wiener_win) { - const int plane_off = (WIENER_WIN - wiener_win) >> 1; - int64_t err = try_restoration_unit(rsc, limits, tile, rui); -#if USE_WIENER_REFINEMENT_SEARCH - int64_t err2; - int tap_min[] = { WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP1_MINV, - WIENER_FILT_TAP2_MINV }; - int tap_max[] = { WIENER_FILT_TAP0_MAXV, WIENER_FILT_TAP1_MAXV, - WIENER_FILT_TAP2_MAXV }; - - WienerInfo *plane_wiener = &rui->wiener_info; - - // printf("err pre = %"PRId64"\n", err); - const int start_step = 4; - for (int s = start_step; s >= 1; s >>= 1) { - for (int p = plane_off; p < WIENER_HALFWIN; ++p) { - int skip = 0; - do { - if (plane_wiener->hfilter[p] - s >= tap_min[p]) { - plane_wiener->hfilter[p] -= s; - plane_wiener->hfilter[WIENER_WIN - p - 1] -= s; - plane_wiener->hfilter[WIENER_HALFWIN] += 2 * s; - err2 = try_restoration_unit(rsc, limits, tile, rui); - if (err2 > err) { - plane_wiener->hfilter[p] += s; - plane_wiener->hfilter[WIENER_WIN - p - 1] += s; - plane_wiener->hfilter[WIENER_HALFWIN] -= 2 * s; - } else { - err = err2; - skip = 1; - // At the highest step size continue moving in the same direction - if (s == start_step) continue; - } - } - break; - } while (1); - if (skip) break; - do { - if (plane_wiener->hfilter[p] + s <= tap_max[p]) { - plane_wiener->hfilter[p] += s; - plane_wiener->hfilter[WIENER_WIN - p - 1] += s; - plane_wiener->hfilter[WIENER_HALFWIN] -= 2 * s; - err2 = try_restoration_unit(rsc, limits, tile, rui); - if (err2 > err) { - plane_wiener->hfilter[p] -= s; - plane_wiener->hfilter[WIENER_WIN - p - 1] -= s; - plane_wiener->hfilter[WIENER_HALFWIN] += 2 * s; - } else { - err = err2; - // At the highest step size continue moving in the same direction - if (s == start_step) continue; - } - } - break; - } while (1); - } - for (int p = plane_off; p < WIENER_HALFWIN; ++p) { - int skip = 0; - do { - if (plane_wiener->vfilter[p] - s >= tap_min[p]) { - plane_wiener->vfilter[p] -= s; - plane_wiener->vfilter[WIENER_WIN - p - 1] -= s; - plane_wiener->vfilter[WIENER_HALFWIN] += 2 * s; - err2 = try_restoration_unit(rsc, limits, tile, rui); - if (err2 > err) { - plane_wiener->vfilter[p] += s; - plane_wiener->vfilter[WIENER_WIN - p - 1] += s; - plane_wiener->vfilter[WIENER_HALFWIN] -= 2 * s; - } else { - err = err2; - skip = 1; - // At the highest step size continue moving in the same direction - if (s == start_step) continue; - } - } - break; - } while (1); - if (skip) break; - do { - if (plane_wiener->vfilter[p] + s <= tap_max[p]) { - plane_wiener->vfilter[p] += s; - plane_wiener->vfilter[WIENER_WIN - p - 1] += s; - plane_wiener->vfilter[WIENER_HALFWIN] -= 2 * s; - err2 = try_restoration_unit(rsc, limits, tile, rui); - if (err2 > err) { - plane_wiener->vfilter[p] -= s; - plane_wiener->vfilter[WIENER_WIN - p - 1] -= s; - plane_wiener->vfilter[WIENER_HALFWIN] += 2 * s; - } else { - err = err2; - // At the highest step size continue moving in the same direction - if (s == start_step) continue; - } - } - break; - } while (1); - } - } -// printf("err post = %"PRId64"\n", err); -#endif // USE_WIENER_REFINEMENT_SEARCH - return err; -} - -static void search_wiener(const RestorationTileLimits *limits, - const AV1PixelRect *tile_rect, int rest_unit_idx, - void *priv, int32_t *tmpbuf, - RestorationLineBuffers *rlbs) { - (void)tmpbuf; - (void)rlbs; - RestSearchCtxt *rsc = (RestSearchCtxt *)priv; - RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx]; - - const int wiener_win = - (rsc->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA; - - double M[WIENER_WIN2]; - double H[WIENER_WIN2 * WIENER_WIN2]; - double vfilterd[WIENER_WIN], hfilterd[WIENER_WIN]; - - const AV1_COMMON *const cm = rsc->cm; - if (cm->seq_params.use_highbitdepth) { - compute_stats_highbd(wiener_win, rsc->dgd_buffer, rsc->src_buffer, - limits->h_start, limits->h_end, limits->v_start, - limits->v_end, rsc->dgd_stride, rsc->src_stride, M, H); - } else { - av1_compute_stats(wiener_win, rsc->dgd_buffer, rsc->src_buffer, - limits->h_start, limits->h_end, limits->v_start, - limits->v_end, rsc->dgd_stride, rsc->src_stride, M, H); - } - - const MACROBLOCK *const x = rsc->x; - const int64_t bits_none = x->wiener_restore_cost[0]; - - if (!wiener_decompose_sep_sym(wiener_win, M, H, vfilterd, hfilterd)) { - rsc->bits += bits_none; - rsc->sse += rusi->sse[RESTORE_NONE]; - rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE; - rusi->sse[RESTORE_WIENER] = INT64_MAX; - return; - } - - RestorationUnitInfo rui; - memset(&rui, 0, sizeof(rui)); - rui.restoration_type = RESTORE_WIENER; - quantize_sym_filter(wiener_win, vfilterd, rui.wiener_info.vfilter); - quantize_sym_filter(wiener_win, hfilterd, rui.wiener_info.hfilter); - - // Filter score computes the value of the function x'*A*x - x'*b for the - // learned filter and compares it against identity filer. If there is no - // reduction in the function, the filter is reverted back to identity - if (compute_score(wiener_win, M, H, rui.wiener_info.vfilter, - rui.wiener_info.hfilter) > 0) { - rsc->bits += bits_none; - rsc->sse += rusi->sse[RESTORE_NONE]; - rusi->best_rtype[RESTORE_WIENER - 1] = RESTORE_NONE; - rusi->sse[RESTORE_WIENER] = INT64_MAX; - return; - } - - aom_clear_system_state(); - - rusi->sse[RESTORE_WIENER] = - finer_tile_search_wiener(rsc, limits, tile_rect, &rui, wiener_win); - rusi->wiener = rui.wiener_info; - - if (wiener_win != WIENER_WIN) { - assert(rui.wiener_info.vfilter[0] == 0 && - rui.wiener_info.vfilter[WIENER_WIN - 1] == 0); - assert(rui.wiener_info.hfilter[0] == 0 && - rui.wiener_info.hfilter[WIENER_WIN - 1] == 0); - } - - const int64_t bits_wiener = - x->wiener_restore_cost[1] + - (count_wiener_bits(wiener_win, &rusi->wiener, &rsc->wiener) - << AV1_PROB_COST_SHIFT); - - double cost_none = - RDCOST_DBL(x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE]); - double cost_wiener = - RDCOST_DBL(x->rdmult, bits_wiener >> 4, rusi->sse[RESTORE_WIENER]); - - RestorationType rtype = - (cost_wiener < cost_none) ? RESTORE_WIENER : RESTORE_NONE; - rusi->best_rtype[RESTORE_WIENER - 1] = rtype; - - rsc->sse += rusi->sse[rtype]; - rsc->bits += (cost_wiener < cost_none) ? bits_wiener : bits_none; - if (cost_wiener < cost_none) rsc->wiener = rusi->wiener; -} - -static void search_norestore(const RestorationTileLimits *limits, - const AV1PixelRect *tile_rect, int rest_unit_idx, - void *priv, int32_t *tmpbuf, - RestorationLineBuffers *rlbs) { - (void)tile_rect; - (void)tmpbuf; - (void)rlbs; - - RestSearchCtxt *rsc = (RestSearchCtxt *)priv; - RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx]; - - const int highbd = rsc->cm->seq_params.use_highbitdepth; - rusi->sse[RESTORE_NONE] = sse_restoration_unit( - limits, rsc->src, rsc->cm->frame_to_show, rsc->plane, highbd); - - rsc->sse += rusi->sse[RESTORE_NONE]; -} - -static void search_switchable(const RestorationTileLimits *limits, - const AV1PixelRect *tile_rect, int rest_unit_idx, - void *priv, int32_t *tmpbuf, - RestorationLineBuffers *rlbs) { - (void)limits; - (void)tile_rect; - (void)tmpbuf; - (void)rlbs; - RestSearchCtxt *rsc = (RestSearchCtxt *)priv; - RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx]; - - const MACROBLOCK *const x = rsc->x; - - const int wiener_win = - (rsc->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA; - - double best_cost = 0; - int64_t best_bits = 0; - RestorationType best_rtype = RESTORE_NONE; - - for (RestorationType r = 0; r < RESTORE_SWITCHABLE_TYPES; ++r) { - // Check for the condition that wiener or sgrproj search could not - // find a solution or the solution was worse than RESTORE_NONE. - // In either case the best_rtype will be set as RESTORE_NONE. These - // should be skipped from the test below. - if (r > RESTORE_NONE) { - if (rusi->best_rtype[r - 1] == RESTORE_NONE) continue; - } - - const int64_t sse = rusi->sse[r]; - int64_t coeff_pcost = 0; - switch (r) { - case RESTORE_NONE: coeff_pcost = 0; break; - case RESTORE_WIENER: - coeff_pcost = - count_wiener_bits(wiener_win, &rusi->wiener, &rsc->wiener); - break; - case RESTORE_SGRPROJ: - coeff_pcost = count_sgrproj_bits(&rusi->sgrproj, &rsc->sgrproj); - break; - default: assert(0); break; - } - const int64_t coeff_bits = coeff_pcost << AV1_PROB_COST_SHIFT; - const int64_t bits = x->switchable_restore_cost[r] + coeff_bits; - double cost = RDCOST_DBL(x->rdmult, bits >> 4, sse); - if (r == RESTORE_SGRPROJ && rusi->sgrproj.ep < 10) - cost *= (1 + DUAL_SGR_PENALTY_MULT * rsc->sf->dual_sgr_penalty_level); - if (r == 0 || cost < best_cost) { - best_cost = cost; - best_bits = bits; - best_rtype = r; - } - } - - rusi->best_rtype[RESTORE_SWITCHABLE - 1] = best_rtype; - - rsc->sse += rusi->sse[best_rtype]; - rsc->bits += best_bits; - if (best_rtype == RESTORE_WIENER) rsc->wiener = rusi->wiener; - if (best_rtype == RESTORE_SGRPROJ) rsc->sgrproj = rusi->sgrproj; -} - -static void copy_unit_info(RestorationType frame_rtype, - const RestUnitSearchInfo *rusi, - RestorationUnitInfo *rui) { - assert(frame_rtype > 0); - rui->restoration_type = rusi->best_rtype[frame_rtype - 1]; - if (rui->restoration_type == RESTORE_WIENER) - rui->wiener_info = rusi->wiener; - else - rui->sgrproj_info = rusi->sgrproj; -} - -static double search_rest_type(RestSearchCtxt *rsc, RestorationType rtype) { - static const rest_unit_visitor_t funs[RESTORE_TYPES] = { - search_norestore, search_wiener, search_sgrproj, search_switchable - }; - - reset_rsc(rsc); - rsc_on_tile(LR_TILE_ROW, LR_TILE_COL, rsc); - av1_foreach_rest_unit_in_plane(rsc->cm, rsc->plane, funs[rtype], rsc, - &rsc->tile_rect, rsc->cm->rst_tmpbuf, NULL); - return RDCOST_DBL(rsc->x->rdmult, rsc->bits >> 4, rsc->sse); -} - -static int rest_tiles_in_plane(const AV1_COMMON *cm, int plane) { - const RestorationInfo *rsi = &cm->rst_info[plane]; - return rsi->units_per_tile; -} - -void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - assert(!cm->all_lossless); - - int ntiles[2]; - for (int is_uv = 0; is_uv < 2; ++is_uv) - ntiles[is_uv] = rest_tiles_in_plane(cm, is_uv); - - assert(ntiles[1] <= ntiles[0]); - RestUnitSearchInfo *rusi = - (RestUnitSearchInfo *)aom_memalign(16, sizeof(*rusi) * ntiles[0]); - - // If the restoration unit dimensions are not multiples of - // rsi->restoration_unit_size then some elements of the rusi array may be - // left uninitialised when we reach copy_unit_info(...). This is not a - // problem, as these elements are ignored later, but in order to quiet - // Valgrind's warnings we initialise the array below. - memset(rusi, 0, sizeof(*rusi) * ntiles[0]); - cpi->td.mb.rdmult = cpi->rd.RDMULT; - - RestSearchCtxt rsc; - const int plane_start = AOM_PLANE_Y; - const int plane_end = num_planes > 1 ? AOM_PLANE_V : AOM_PLANE_Y; - for (int plane = plane_start; plane <= plane_end; ++plane) { - init_rsc(src, &cpi->common, &cpi->td.mb, &cpi->sf, plane, rusi, - &cpi->trial_frame_rst, &rsc); - - const int plane_ntiles = ntiles[plane > 0]; - const RestorationType num_rtypes = - (plane_ntiles > 1) ? RESTORE_TYPES : RESTORE_SWITCHABLE_TYPES; - - double best_cost = 0; - RestorationType best_rtype = RESTORE_NONE; - - const int highbd = rsc.cm->seq_params.use_highbitdepth; - extend_frame(rsc.dgd_buffer, rsc.plane_width, rsc.plane_height, - rsc.dgd_stride, RESTORATION_BORDER, RESTORATION_BORDER, - highbd); - - for (RestorationType r = 0; r < num_rtypes; ++r) { - if ((force_restore_type != RESTORE_TYPES) && (r != RESTORE_NONE) && - (r != force_restore_type)) - continue; - - double cost = search_rest_type(&rsc, r); - - if (r == 0 || cost < best_cost) { - best_cost = cost; - best_rtype = r; - } - } - - cm->rst_info[plane].frame_restoration_type = best_rtype; - if (force_restore_type != RESTORE_TYPES) - assert(best_rtype == force_restore_type || best_rtype == RESTORE_NONE); - - if (best_rtype != RESTORE_NONE) { - for (int u = 0; u < plane_ntiles; ++u) { - copy_unit_info(best_rtype, &rusi[u], &cm->rst_info[plane].unit_info[u]); - } - } - } - - aom_free(rusi); -} diff --git a/third_party/aom/av1/encoder/pickrst.h b/third_party/aom/av1/encoder/pickrst.h deleted file mode 100644 index 3fec0c34b..000000000 --- a/third_party/aom/av1/encoder/pickrst.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#ifndef AOM_AV1_ENCODER_PICKRST_H_ -#define AOM_AV1_ENCODER_PICKRST_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "av1/encoder/encoder.h" -#include "aom_ports/system_state.h" - -struct yv12_buffer_config; -struct AV1_COMP; - -static const uint8_t g_shuffle_stats_data[16] = { - 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, -}; - -static INLINE double find_average(const uint8_t *src, int h_start, int h_end, - int v_start, int v_end, int stride) { - uint64_t sum = 0; - double avg = 0; - int i, j; - aom_clear_system_state(); - for (i = v_start; i < v_end; i++) - for (j = h_start; j < h_end; j++) sum += src[i * stride + j]; - avg = (double)sum / ((v_end - v_start) * (h_end - h_start)); - return avg; -} - -void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_PICKRST_H_ diff --git a/third_party/aom/av1/encoder/pustats.h b/third_party/aom/av1/encoder/pustats.h deleted file mode 100644 index 40dd46768..000000000 --- a/third_party/aom/av1/encoder/pustats.h +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_PUSTATS_H_ -#define AOM_AV1_ENCODER_PUSTATS_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "av1/encoder/ml.h" - -#define NUM_FEATURES_PUSTATS 8 -#define NUM_HIDDEN_LAYERS 2 -#define HIDDEN_LAYERS_0_NODES 12 -#define HIDDEN_LAYERS_1_NODES 10 -#define LOGITS_NODES 1 - -static const float - av1_pustats_rate_hiddenlayer_0_kernel[NUM_FEATURES_PUSTATS * - HIDDEN_LAYERS_0_NODES] = { - -0.1758f, -0.0499f, -10.0069f, -2.2838f, -0.3359f, 0.3459f, -0.3285f, - -0.0515f, -0.5417f, 0.2357f, -0.0575f, -69.0782f, 0.5348f, 1.4068f, - 0.2213f, -1.0490f, -0.0636f, 0.1654f, 1.1002f, 33.4924f, 0.4358f, - 1.2499f, 0.1143f, 0.0592f, -1.6335f, -0.0092f, 1.2207f, -28.4543f, - -0.4973f, 0.4368f, 0.2341f, -0.1623f, -3.8986f, 0.1311f, -1.8789f, - -3.9079f, -0.8158f, -0.8420f, 1.4295f, -2.3629f, -1.4825f, 0.6498f, - -5.3669f, 6.4434f, 1.8393f, -35.0678f, 3.7459f, -2.8504f, 2.0502f, - -0.1812f, -3.9011f, -1.0155f, 1.8375f, -1.4517f, 1.3917f, 3.8664f, - 0.8345f, -0.3472f, 5.7740f, -1.1196f, -0.3264f, -1.2481f, -0.9284f, - -4.9657f, 2.2831f, 0.7337f, 2.3176f, 0.6416f, 0.8804f, 1.9988f, - -1.3426f, 1.2728f, 1.2249f, -0.1551f, 5.6045f, 0.2046f, -2.1464f, - -2.4922f, -0.5334f, 12.1055f, 7.2467f, -0.0070f, 0.0234f, 0.0021f, - 0.0215f, -0.0098f, -0.0682f, -6.1494f, -0.3176f, -1.6069f, -0.2119f, - -1.0533f, -0.3566f, 0.5294f, -0.4335f, 0.1626f, - }; - -static const float av1_pustats_rate_hiddenlayer_0_bias[HIDDEN_LAYERS_0_NODES] = - { - 10.5266f, 5.3268f, -1.0678f, 7.7411f, 8.7164f, -0.3235f, - 7.3028f, 9.0874f, -6.4594f, -1.0102f, -1.1146f, 10.8419f, - }; - -static const float - av1_pustats_rate_hiddenlayer_1_kernel[HIDDEN_LAYERS_0_NODES * - HIDDEN_LAYERS_1_NODES] = { - 10.5932f, 2.5192f, -0.0015f, 5.9479f, 5.2426f, -0.4091f, 5.3220f, - 6.0469f, 0.7200f, 3.3241f, 5.5006f, 12.8290f, -1.6396f, 0.5743f, - -0.8370f, 1.9956f, -4.9270f, -1.5295f, 2.1350f, -9.4415f, -0.7094f, - 5.1822f, 19.7287f, -3.0444f, -0.3320f, 0.0031f, -0.2709f, -0.5249f, - 0.3281f, -0.2240f, 0.2225f, -0.2386f, -0.4370f, -0.2438f, -0.4928f, - -0.2842f, -2.1772f, 9.2570f, -17.6655f, 3.5448f, -2.8394f, -1.0167f, - -0.5115f, -1.9260f, -0.2111f, -0.7528f, -1.2387f, -0.0401f, 5.0716f, - -3.3763f, -0.2898f, -0.4956f, -7.9993f, 0.1526f, -0.0242f, 0.7354f, - 6.0432f, 4.8043f, 7.4790f, -0.6295f, 1.7565f, 3.7197f, -2.3963f, - 6.8945f, 2.9717f, -3.1623f, 3.4241f, 4.4676f, -1.8154f, -2.9401f, - -8.5657f, -3.0240f, -1.4661f, 8.1145f, -12.7858f, 3.3624f, -1.0819f, - -4.2856f, 1.1801f, -0.5587f, -1.6062f, -1.1813f, -3.5882f, -0.2490f, - -24.9566f, -0.4140f, -0.1113f, 3.5537f, 4.4112f, 0.1367f, -1.5876f, - 1.6605f, 1.3903f, -0.0253f, -2.1419f, -2.2197f, -0.7659f, -0.4249f, - -0.0424f, 0.1486f, 0.4643f, -0.9068f, -0.3619f, -0.7624f, -0.9132f, - -0.4947f, -0.3527f, -0.5445f, -0.4768f, -1.7761f, -1.0686f, 0.5462f, - 1.3371f, 4.3116f, 0.0777f, -2.7216f, -1.8908f, 3.4989f, 7.7269f, - -2.7566f, - }; - -static const float av1_pustats_rate_hiddenlayer_1_bias[HIDDEN_LAYERS_1_NODES] = - { - 13.2435f, -8.5477f, -0.0998f, -1.5131f, -12.0187f, - 6.1715f, 0.5094f, 7.6433f, -0.3992f, -1.3555f, - }; - -static const float - av1_pustats_rate_logits_kernel[HIDDEN_LAYERS_1_NODES * LOGITS_NODES] = { - 4.3078f, -17.3497f, 0.0195f, 34.6032f, -5.0127f, - 5.3079f, 10.0077f, -13.129f, 0.0087f, -8.4009f, - }; - -static const float av1_pustats_rate_logits_bias[LOGITS_NODES] = { - 4.5103f, -}; - -static const NN_CONFIG av1_pustats_rate_nnconfig = { - NUM_FEATURES_PUSTATS, // num_inputs - LOGITS_NODES, // num_outputs - NUM_HIDDEN_LAYERS, // num_hidden_layers - { HIDDEN_LAYERS_0_NODES, HIDDEN_LAYERS_1_NODES }, // num_hidden_nodes - { - av1_pustats_rate_hiddenlayer_0_kernel, - av1_pustats_rate_hiddenlayer_1_kernel, - av1_pustats_rate_logits_kernel, - }, - { - av1_pustats_rate_hiddenlayer_0_bias, - av1_pustats_rate_hiddenlayer_1_bias, - av1_pustats_rate_logits_bias, - }, -}; - -static const float - av1_pustats_dist_hiddenlayer_0_kernel[NUM_FEATURES_PUSTATS * - HIDDEN_LAYERS_0_NODES] = { - -0.2560f, 0.1105f, -0.8434f, -0.0132f, -8.9371f, -1.1176f, -0.3655f, - 0.4885f, 1.7518f, 0.4985f, 0.5582f, -0.3739f, 0.9403f, 0.3874f, - 0.3265f, 1.7383f, 3.1747f, 0.0285f, 3.3942f, -0.0123f, 0.5057f, - 0.1584f, 0.2697f, 4.6151f, 3.6251f, -0.0121f, -1.0047f, -0.0037f, - 0.0127f, 0.1935f, -0.5277f, -2.7144f, 0.0729f, -0.1457f, -0.0816f, - -0.5462f, 0.4738f, 0.3599f, -0.0564f, 0.0910f, 0.0126f, -0.0310f, - -2.1311f, -0.4666f, -0.0074f, -0.0765f, 0.0287f, -0.2662f, -0.0999f, - -0.2983f, -0.4899f, -0.2314f, 0.2873f, -0.3614f, 0.1783f, -0.1210f, - 0.3569f, 0.5436f, -8.0536f, -0.0044f, -1.5255f, -0.8247f, -0.4556f, - 1.9045f, 0.5463f, 0.1102f, -0.9293f, -0.0185f, -0.8302f, -0.4378f, - -0.3531f, -1.3095f, 0.6099f, 0.7977f, 4.1950f, -0.0067f, -0.2762f, - -0.1574f, -0.2149f, 0.6104f, -1.7053f, 0.1904f, 4.2402f, -0.2671f, - 0.8940f, 0.6820f, 0.2241f, -0.9459f, 1.4571f, 0.5255f, 2.3352f, - -0.0806f, 0.5231f, 0.3928f, 0.4146f, 2.0956f, - }; - -static const float av1_pustats_dist_hiddenlayer_0_bias[HIDDEN_LAYERS_0_NODES] = - { - 1.1597f, 0.0836f, -0.7471f, -0.2439f, -0.0438f, 2.4626f, - 0.f, 1.1485f, 2.7085f, -4.7897f, 1.4093f, -1.657f, - }; - -static const float - av1_pustats_dist_hiddenlayer_1_kernel[HIDDEN_LAYERS_0_NODES * - HIDDEN_LAYERS_1_NODES] = { - -0.5203f, -1.3468f, 0.3865f, -0.6859f, 0.0058f, 4.0682f, 0.4807f, - -0.1380f, 0.6050f, 0.8958f, 0.7748f, -0.1311f, 1.7317f, 1.1265f, - 0.0827f, 0.1407f, -0.3605f, 0.5429f, 0.1880f, -0.1439f, 0.2837f, - 1.6477f, 0.0832f, 0.0593f, -1.8464f, -0.7241f, -1.0672f, -0.3546f, - -0.3842f, -2.3637f, 0.2514f, 0.8263f, -0.1872f, 0.5774f, -0.3610f, - -0.0205f, 1.3977f, -0.1083f, 0.6923f, 1.3039f, -0.2870f, 1.0622f, - -0.0566f, 0.2697f, -0.5429f, -0.6193f, 1.7559f, 0.3246f, 1.9159f, - 0.3744f, 0.0686f, 1.0191f, -0.4212f, 1.9591f, -0.0691f, -0.1085f, - -1.2034f, 0.0606f, 1.0116f, 0.5565f, -0.1874f, -0.7898f, 0.4796f, - 0.2290f, 0.4334f, -0.5817f, -0.2949f, 0.1367f, -0.2932f, -1.1265f, - 0.0133f, -0.5309f, -3.3191f, 0.0939f, 0.3895f, -2.5812f, -0.0066f, - -3.0063f, -0.2982f, 0.7309f, -0.2422f, -0.2770f, -0.7152f, 0.1700f, - 1.9630f, 0.1988f, 0.4194f, 0.8762f, 0.3402f, 0.1051f, -0.1598f, - 0.2405f, 0.0392f, 1.1256f, 1.5245f, 0.0950f, 0.2160f, -0.5023f, - 0.2584f, 0.2074f, 0.2218f, 0.3966f, -0.0921f, -0.2435f, -0.4560f, - -1.1923f, -0.3716f, -0.3286f, -1.3225f, 0.1896f, -0.3342f, -0.7888f, - -0.4488f, -1.7168f, 0.3341f, 0.1146f, 0.5226f, 0.2610f, -0.4574f, - -0.4164f, - }; - -static const float av1_pustats_dist_hiddenlayer_1_bias[HIDDEN_LAYERS_1_NODES] = - { - -2.3014f, -2.4292f, 1.3317f, -3.2361f, -1.918f, - 2.7149f, -2.5649f, 2.7765f, 2.9617f, 2.7684f, - }; - -static const float - av1_pustats_dist_logits_kernel[HIDDEN_LAYERS_1_NODES * LOGITS_NODES] = { - -0.6868f, -0.6715f, 0.449f, -1.293f, 0.6214f, - 0.9894f, -0.4342f, 0.7002f, 1.4363f, 0.6951f, - }; - -static const float av1_pustats_dist_logits_bias[LOGITS_NODES] = { - 2.3371f, -}; - -static const NN_CONFIG av1_pustats_dist_nnconfig = { - NUM_FEATURES_PUSTATS, // num_inputs - LOGITS_NODES, // num_outputs - NUM_HIDDEN_LAYERS, // num_hidden_layers - { HIDDEN_LAYERS_0_NODES, HIDDEN_LAYERS_1_NODES }, // num_hidden_nodes - { - av1_pustats_dist_hiddenlayer_0_kernel, - av1_pustats_dist_hiddenlayer_1_kernel, - av1_pustats_dist_logits_kernel, - }, - { - av1_pustats_dist_hiddenlayer_0_bias, - av1_pustats_dist_hiddenlayer_1_bias, - av1_pustats_dist_logits_bias, - }, -}; - -#undef NUM_HIDDEN_LAYERS -#undef HIDDEN_LAYERS_0_NODES -#undef HIDDEN_LAYERS_1_NODES -#undef LOGITS_NODES - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_PUSTATS_H_ diff --git a/third_party/aom/av1/encoder/random.h b/third_party/aom/av1/encoder/random.h deleted file mode 100644 index 0bca39102..000000000 --- a/third_party/aom/av1/encoder/random.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_RANDOM_H_ -#define AOM_AV1_ENCODER_RANDOM_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -// Generate a random number in the range [0, 32768). -static INLINE unsigned int lcg_rand16(unsigned int *state) { - *state = (unsigned int)(*state * 1103515245ULL + 12345); - return *state / 65536 % 32768; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_RANDOM_H_ diff --git a/third_party/aom/av1/encoder/ransac.c b/third_party/aom/av1/encoder/ransac.c deleted file mode 100644 index 781f528eb..000000000 --- a/third_party/aom/av1/encoder/ransac.c +++ /dev/null @@ -1,603 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#include -#include -#include -#include -#include -#include - -#include "av1/encoder/ransac.h" -#include "av1/encoder/mathutils.h" -#include "av1/encoder/random.h" - -#define MAX_MINPTS 4 -#define MAX_DEGENERATE_ITER 10 -#define MINPTS_MULTIPLIER 5 - -#define INLIER_THRESHOLD 1.0 -#define MIN_TRIALS 20 - -//////////////////////////////////////////////////////////////////////////////// -// ransac -typedef int (*IsDegenerateFunc)(double *p); -typedef void (*NormalizeFunc)(double *p, int np, double *T); -typedef void (*DenormalizeFunc)(double *params, double *T1, double *T2); -typedef int (*FindTransformationFunc)(int points, double *points1, - double *points2, double *params); -typedef void (*ProjectPointsDoubleFunc)(double *mat, double *points, - double *proj, const int n, - const int stride_points, - const int stride_proj); - -static void project_points_double_translation(double *mat, double *points, - double *proj, const int n, - const int stride_points, - const int stride_proj) { - int i; - for (i = 0; i < n; ++i) { - const double x = *(points++), y = *(points++); - *(proj++) = x + mat[0]; - *(proj++) = y + mat[1]; - points += stride_points - 2; - proj += stride_proj - 2; - } -} - -static void project_points_double_rotzoom(double *mat, double *points, - double *proj, const int n, - const int stride_points, - const int stride_proj) { - int i; - for (i = 0; i < n; ++i) { - const double x = *(points++), y = *(points++); - *(proj++) = mat[2] * x + mat[3] * y + mat[0]; - *(proj++) = -mat[3] * x + mat[2] * y + mat[1]; - points += stride_points - 2; - proj += stride_proj - 2; - } -} - -static void project_points_double_affine(double *mat, double *points, - double *proj, const int n, - const int stride_points, - const int stride_proj) { - int i; - for (i = 0; i < n; ++i) { - const double x = *(points++), y = *(points++); - *(proj++) = mat[2] * x + mat[3] * y + mat[0]; - *(proj++) = mat[4] * x + mat[5] * y + mat[1]; - points += stride_points - 2; - proj += stride_proj - 2; - } -} - -static void normalize_homography(double *pts, int n, double *T) { - double *p = pts; - double mean[2] = { 0, 0 }; - double msqe = 0; - double scale; - int i; - - assert(n > 0); - for (i = 0; i < n; ++i, p += 2) { - mean[0] += p[0]; - mean[1] += p[1]; - } - mean[0] /= n; - mean[1] /= n; - for (p = pts, i = 0; i < n; ++i, p += 2) { - p[0] -= mean[0]; - p[1] -= mean[1]; - msqe += sqrt(p[0] * p[0] + p[1] * p[1]); - } - msqe /= n; - scale = (msqe == 0 ? 1.0 : sqrt(2) / msqe); - T[0] = scale; - T[1] = 0; - T[2] = -scale * mean[0]; - T[3] = 0; - T[4] = scale; - T[5] = -scale * mean[1]; - T[6] = 0; - T[7] = 0; - T[8] = 1; - for (p = pts, i = 0; i < n; ++i, p += 2) { - p[0] *= scale; - p[1] *= scale; - } -} - -static void invnormalize_mat(double *T, double *iT) { - double is = 1.0 / T[0]; - double m0 = -T[2] * is; - double m1 = -T[5] * is; - iT[0] = is; - iT[1] = 0; - iT[2] = m0; - iT[3] = 0; - iT[4] = is; - iT[5] = m1; - iT[6] = 0; - iT[7] = 0; - iT[8] = 1; -} - -static void denormalize_homography(double *params, double *T1, double *T2) { - double iT2[9]; - double params2[9]; - invnormalize_mat(T2, iT2); - multiply_mat(params, T1, params2, 3, 3, 3); - multiply_mat(iT2, params2, params, 3, 3, 3); -} - -static void denormalize_affine_reorder(double *params, double *T1, double *T2) { - double params_denorm[MAX_PARAMDIM]; - params_denorm[0] = params[0]; - params_denorm[1] = params[1]; - params_denorm[2] = params[4]; - params_denorm[3] = params[2]; - params_denorm[4] = params[3]; - params_denorm[5] = params[5]; - params_denorm[6] = params_denorm[7] = 0; - params_denorm[8] = 1; - denormalize_homography(params_denorm, T1, T2); - params[0] = params_denorm[2]; - params[1] = params_denorm[5]; - params[2] = params_denorm[0]; - params[3] = params_denorm[1]; - params[4] = params_denorm[3]; - params[5] = params_denorm[4]; - params[6] = params[7] = 0; -} - -static void denormalize_rotzoom_reorder(double *params, double *T1, - double *T2) { - double params_denorm[MAX_PARAMDIM]; - params_denorm[0] = params[0]; - params_denorm[1] = params[1]; - params_denorm[2] = params[2]; - params_denorm[3] = -params[1]; - params_denorm[4] = params[0]; - params_denorm[5] = params[3]; - params_denorm[6] = params_denorm[7] = 0; - params_denorm[8] = 1; - denormalize_homography(params_denorm, T1, T2); - params[0] = params_denorm[2]; - params[1] = params_denorm[5]; - params[2] = params_denorm[0]; - params[3] = params_denorm[1]; - params[4] = -params[3]; - params[5] = params[2]; - params[6] = params[7] = 0; -} - -static void denormalize_translation_reorder(double *params, double *T1, - double *T2) { - double params_denorm[MAX_PARAMDIM]; - params_denorm[0] = 1; - params_denorm[1] = 0; - params_denorm[2] = params[0]; - params_denorm[3] = 0; - params_denorm[4] = 1; - params_denorm[5] = params[1]; - params_denorm[6] = params_denorm[7] = 0; - params_denorm[8] = 1; - denormalize_homography(params_denorm, T1, T2); - params[0] = params_denorm[2]; - params[1] = params_denorm[5]; - params[2] = params[5] = 1; - params[3] = params[4] = 0; - params[6] = params[7] = 0; -} - -static int find_translation(int np, double *pts1, double *pts2, double *mat) { - int i; - double sx, sy, dx, dy; - double sumx, sumy; - - double T1[9], T2[9]; - normalize_homography(pts1, np, T1); - normalize_homography(pts2, np, T2); - - sumx = 0; - sumy = 0; - for (i = 0; i < np; ++i) { - dx = *(pts2++); - dy = *(pts2++); - sx = *(pts1++); - sy = *(pts1++); - - sumx += dx - sx; - sumy += dy - sy; - } - mat[0] = sumx / np; - mat[1] = sumy / np; - denormalize_translation_reorder(mat, T1, T2); - return 0; -} - -static int find_rotzoom(int np, double *pts1, double *pts2, double *mat) { - const int np2 = np * 2; - double *a = (double *)aom_malloc(sizeof(*a) * (np2 * 5 + 20)); - double *b = a + np2 * 4; - double *temp = b + np2; - int i; - double sx, sy, dx, dy; - - double T1[9], T2[9]; - normalize_homography(pts1, np, T1); - normalize_homography(pts2, np, T2); - - for (i = 0; i < np; ++i) { - dx = *(pts2++); - dy = *(pts2++); - sx = *(pts1++); - sy = *(pts1++); - - a[i * 2 * 4 + 0] = sx; - a[i * 2 * 4 + 1] = sy; - a[i * 2 * 4 + 2] = 1; - a[i * 2 * 4 + 3] = 0; - a[(i * 2 + 1) * 4 + 0] = sy; - a[(i * 2 + 1) * 4 + 1] = -sx; - a[(i * 2 + 1) * 4 + 2] = 0; - a[(i * 2 + 1) * 4 + 3] = 1; - - b[2 * i] = dx; - b[2 * i + 1] = dy; - } - if (!least_squares(4, a, np2, 4, b, temp, mat)) { - aom_free(a); - return 1; - } - denormalize_rotzoom_reorder(mat, T1, T2); - aom_free(a); - return 0; -} - -static int find_affine(int np, double *pts1, double *pts2, double *mat) { - const int np2 = np * 2; - double *a = (double *)aom_malloc(sizeof(*a) * (np2 * 7 + 42)); - double *b = a + np2 * 6; - double *temp = b + np2; - int i; - double sx, sy, dx, dy; - - double T1[9], T2[9]; - normalize_homography(pts1, np, T1); - normalize_homography(pts2, np, T2); - - for (i = 0; i < np; ++i) { - dx = *(pts2++); - dy = *(pts2++); - sx = *(pts1++); - sy = *(pts1++); - - a[i * 2 * 6 + 0] = sx; - a[i * 2 * 6 + 1] = sy; - a[i * 2 * 6 + 2] = 0; - a[i * 2 * 6 + 3] = 0; - a[i * 2 * 6 + 4] = 1; - a[i * 2 * 6 + 5] = 0; - a[(i * 2 + 1) * 6 + 0] = 0; - a[(i * 2 + 1) * 6 + 1] = 0; - a[(i * 2 + 1) * 6 + 2] = sx; - a[(i * 2 + 1) * 6 + 3] = sy; - a[(i * 2 + 1) * 6 + 4] = 0; - a[(i * 2 + 1) * 6 + 5] = 1; - - b[2 * i] = dx; - b[2 * i + 1] = dy; - } - if (!least_squares(6, a, np2, 6, b, temp, mat)) { - aom_free(a); - return 1; - } - denormalize_affine_reorder(mat, T1, T2); - aom_free(a); - return 0; -} - -static int get_rand_indices(int npoints, int minpts, int *indices, - unsigned int *seed) { - int i, j; - int ptr = lcg_rand16(seed) % npoints; - if (minpts > npoints) return 0; - indices[0] = ptr; - ptr = (ptr == npoints - 1 ? 0 : ptr + 1); - i = 1; - while (i < minpts) { - int index = lcg_rand16(seed) % npoints; - while (index) { - ptr = (ptr == npoints - 1 ? 0 : ptr + 1); - for (j = 0; j < i; ++j) { - if (indices[j] == ptr) break; - } - if (j == i) index--; - } - indices[i++] = ptr; - } - return 1; -} - -typedef struct { - int num_inliers; - double variance; - int *inlier_indices; -} RANSAC_MOTION; - -// Return -1 if 'a' is a better motion, 1 if 'b' is better, 0 otherwise. -static int compare_motions(const void *arg_a, const void *arg_b) { - const RANSAC_MOTION *motion_a = (RANSAC_MOTION *)arg_a; - const RANSAC_MOTION *motion_b = (RANSAC_MOTION *)arg_b; - - if (motion_a->num_inliers > motion_b->num_inliers) return -1; - if (motion_a->num_inliers < motion_b->num_inliers) return 1; - if (motion_a->variance < motion_b->variance) return -1; - if (motion_a->variance > motion_b->variance) return 1; - return 0; -} - -static int is_better_motion(const RANSAC_MOTION *motion_a, - const RANSAC_MOTION *motion_b) { - return compare_motions(motion_a, motion_b) < 0; -} - -static void copy_points_at_indices(double *dest, const double *src, - const int *indices, int num_points) { - for (int i = 0; i < num_points; ++i) { - const int index = indices[i]; - dest[i * 2] = src[index * 2]; - dest[i * 2 + 1] = src[index * 2 + 1]; - } -} - -static const double kInfiniteVariance = 1e12; - -static void clear_motion(RANSAC_MOTION *motion, int num_points) { - motion->num_inliers = 0; - motion->variance = kInfiniteVariance; - memset(motion->inlier_indices, 0, - sizeof(*motion->inlier_indices * num_points)); -} - -static int ransac(const int *matched_points, int npoints, - int *num_inliers_by_motion, double *params_by_motion, - int num_desired_motions, const int minpts, - IsDegenerateFunc is_degenerate, - FindTransformationFunc find_transformation, - ProjectPointsDoubleFunc projectpoints) { - static const double PROBABILITY_REQUIRED = 0.9; - static const double EPS = 1e-12; - - int N = 10000, trial_count = 0; - int i = 0; - int ret_val = 0; - - unsigned int seed = (unsigned int)npoints; - - int indices[MAX_MINPTS] = { 0 }; - - double *points1, *points2; - double *corners1, *corners2; - double *image1_coord; - - // Store information for the num_desired_motions best transformations found - // and the worst motion among them, as well as the motion currently under - // consideration. - RANSAC_MOTION *motions, *worst_kept_motion = NULL; - RANSAC_MOTION current_motion; - - // Store the parameters and the indices of the inlier points for the motion - // currently under consideration. - double params_this_motion[MAX_PARAMDIM]; - - double *cnp1, *cnp2; - - for (i = 0; i < num_desired_motions; ++i) { - num_inliers_by_motion[i] = 0; - } - if (npoints < minpts * MINPTS_MULTIPLIER || npoints == 0) { - return 1; - } - - points1 = (double *)aom_malloc(sizeof(*points1) * npoints * 2); - points2 = (double *)aom_malloc(sizeof(*points2) * npoints * 2); - corners1 = (double *)aom_malloc(sizeof(*corners1) * npoints * 2); - corners2 = (double *)aom_malloc(sizeof(*corners2) * npoints * 2); - image1_coord = (double *)aom_malloc(sizeof(*image1_coord) * npoints * 2); - - motions = - (RANSAC_MOTION *)aom_malloc(sizeof(RANSAC_MOTION) * num_desired_motions); - for (i = 0; i < num_desired_motions; ++i) { - motions[i].inlier_indices = - (int *)aom_malloc(sizeof(*motions->inlier_indices) * npoints); - clear_motion(motions + i, npoints); - } - current_motion.inlier_indices = - (int *)aom_malloc(sizeof(*current_motion.inlier_indices) * npoints); - clear_motion(¤t_motion, npoints); - - worst_kept_motion = motions; - - if (!(points1 && points2 && corners1 && corners2 && image1_coord && motions && - current_motion.inlier_indices)) { - ret_val = 1; - goto finish_ransac; - } - - cnp1 = corners1; - cnp2 = corners2; - for (i = 0; i < npoints; ++i) { - *(cnp1++) = *(matched_points++); - *(cnp1++) = *(matched_points++); - *(cnp2++) = *(matched_points++); - *(cnp2++) = *(matched_points++); - } - - while (N > trial_count) { - double sum_distance = 0.0; - double sum_distance_squared = 0.0; - - clear_motion(¤t_motion, npoints); - - int degenerate = 1; - int num_degenerate_iter = 0; - - while (degenerate) { - num_degenerate_iter++; - if (!get_rand_indices(npoints, minpts, indices, &seed)) { - ret_val = 1; - goto finish_ransac; - } - - copy_points_at_indices(points1, corners1, indices, minpts); - copy_points_at_indices(points2, corners2, indices, minpts); - - degenerate = is_degenerate(points1); - if (num_degenerate_iter > MAX_DEGENERATE_ITER) { - ret_val = 1; - goto finish_ransac; - } - } - - if (find_transformation(minpts, points1, points2, params_this_motion)) { - trial_count++; - continue; - } - - projectpoints(params_this_motion, corners1, image1_coord, npoints, 2, 2); - - for (i = 0; i < npoints; ++i) { - double dx = image1_coord[i * 2] - corners2[i * 2]; - double dy = image1_coord[i * 2 + 1] - corners2[i * 2 + 1]; - double distance = sqrt(dx * dx + dy * dy); - - if (distance < INLIER_THRESHOLD) { - current_motion.inlier_indices[current_motion.num_inliers++] = i; - sum_distance += distance; - sum_distance_squared += distance * distance; - } - } - - if (current_motion.num_inliers >= worst_kept_motion->num_inliers && - current_motion.num_inliers > 1) { - int temp; - double fracinliers, pNoOutliers, mean_distance, dtemp; - mean_distance = sum_distance / ((double)current_motion.num_inliers); - current_motion.variance = - sum_distance_squared / ((double)current_motion.num_inliers - 1.0) - - mean_distance * mean_distance * ((double)current_motion.num_inliers) / - ((double)current_motion.num_inliers - 1.0); - if (is_better_motion(¤t_motion, worst_kept_motion)) { - // This motion is better than the worst currently kept motion. Remember - // the inlier points and variance. The parameters for each kept motion - // will be recomputed later using only the inliers. - worst_kept_motion->num_inliers = current_motion.num_inliers; - worst_kept_motion->variance = current_motion.variance; - memcpy(worst_kept_motion->inlier_indices, current_motion.inlier_indices, - sizeof(*current_motion.inlier_indices) * npoints); - - assert(npoints > 0); - fracinliers = (double)current_motion.num_inliers / (double)npoints; - pNoOutliers = 1 - pow(fracinliers, minpts); - pNoOutliers = fmax(EPS, pNoOutliers); - pNoOutliers = fmin(1 - EPS, pNoOutliers); - dtemp = log(1.0 - PROBABILITY_REQUIRED) / log(pNoOutliers); - temp = (dtemp > (double)INT32_MAX) - ? INT32_MAX - : dtemp < (double)INT32_MIN ? INT32_MIN : (int)dtemp; - - if (temp > 0 && temp < N) { - N = AOMMAX(temp, MIN_TRIALS); - } - - // Determine the new worst kept motion and its num_inliers and variance. - for (i = 0; i < num_desired_motions; ++i) { - if (is_better_motion(worst_kept_motion, &motions[i])) { - worst_kept_motion = &motions[i]; - } - } - } - } - trial_count++; - } - - // Sort the motions, best first. - qsort(motions, num_desired_motions, sizeof(RANSAC_MOTION), compare_motions); - - // Recompute the motions using only the inliers. - for (i = 0; i < num_desired_motions; ++i) { - if (motions[i].num_inliers >= minpts) { - copy_points_at_indices(points1, corners1, motions[i].inlier_indices, - motions[i].num_inliers); - copy_points_at_indices(points2, corners2, motions[i].inlier_indices, - motions[i].num_inliers); - - find_transformation(motions[i].num_inliers, points1, points2, - params_by_motion + (MAX_PARAMDIM - 1) * i); - } - num_inliers_by_motion[i] = motions[i].num_inliers; - } - -finish_ransac: - aom_free(points1); - aom_free(points2); - aom_free(corners1); - aom_free(corners2); - aom_free(image1_coord); - aom_free(current_motion.inlier_indices); - for (i = 0; i < num_desired_motions; ++i) { - aom_free(motions[i].inlier_indices); - } - aom_free(motions); - - return ret_val; -} - -static int is_collinear3(double *p1, double *p2, double *p3) { - static const double collinear_eps = 1e-3; - const double v = - (p2[0] - p1[0]) * (p3[1] - p1[1]) - (p2[1] - p1[1]) * (p3[0] - p1[0]); - return fabs(v) < collinear_eps; -} - -static int is_degenerate_translation(double *p) { - return (p[0] - p[2]) * (p[0] - p[2]) + (p[1] - p[3]) * (p[1] - p[3]) <= 2; -} - -static int is_degenerate_affine(double *p) { - return is_collinear3(p, p + 2, p + 4); -} - -int ransac_translation(int *matched_points, int npoints, - int *num_inliers_by_motion, double *params_by_motion, - int num_desired_motions) { - return ransac(matched_points, npoints, num_inliers_by_motion, - params_by_motion, num_desired_motions, 3, - is_degenerate_translation, find_translation, - project_points_double_translation); -} - -int ransac_rotzoom(int *matched_points, int npoints, int *num_inliers_by_motion, - double *params_by_motion, int num_desired_motions) { - return ransac(matched_points, npoints, num_inliers_by_motion, - params_by_motion, num_desired_motions, 3, is_degenerate_affine, - find_rotzoom, project_points_double_rotzoom); -} - -int ransac_affine(int *matched_points, int npoints, int *num_inliers_by_motion, - double *params_by_motion, int num_desired_motions) { - return ransac(matched_points, npoints, num_inliers_by_motion, - params_by_motion, num_desired_motions, 3, is_degenerate_affine, - find_affine, project_points_double_affine); -} diff --git a/third_party/aom/av1/encoder/ransac.h b/third_party/aom/av1/encoder/ransac.h deleted file mode 100644 index c429f2ce5..000000000 --- a/third_party/aom/av1/encoder/ransac.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_RANSAC_H_ -#define AOM_AV1_ENCODER_RANSAC_H_ - -#include -#include -#include -#include - -#include "av1/common/warped_motion.h" - -typedef int (*RansacFunc)(int *matched_points, int npoints, - int *num_inliers_by_motion, double *params_by_motion, - int num_motions); - -/* Each of these functions fits a motion model from a set of - corresponding points in 2 frames using RANSAC. */ -int ransac_affine(int *matched_points, int npoints, int *num_inliers_by_motion, - double *params_by_motion, int num_motions); -int ransac_rotzoom(int *matched_points, int npoints, int *num_inliers_by_motion, - double *params_by_motion, int num_motions); -int ransac_translation(int *matched_points, int npoints, - int *num_inliers_by_motion, double *params_by_motion, - int num_motions); -#endif // AOM_AV1_ENCODER_RANSAC_H_ diff --git a/third_party/aom/av1/encoder/rate_distortion_model_params.h b/third_party/aom/av1/encoder/rate_distortion_model_params.h deleted file mode 100644 index 7cd0962c5..000000000 --- a/third_party/aom/av1/encoder/rate_distortion_model_params.h +++ /dev/null @@ -1,591 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_RATE_DISTORTION_MODEL_PARAMS_H_ -#define AOM_AV1_ENCODER_RATE_DISTORTION_MODEL_PARAMS_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "av1/encoder/ml.h" - -// 22 float features + -// 2 categorical features with 4 possible values, converted to one-hot vectors. -// So, total 22 + 2 * 4 = 30 features. -#define NUM_FEATURES 30 -#define NUM_HIDDEN_LAYERS 1 -#define NUM_HIDDEN_NODES 96 -#define NUM_OUTPUTS 1 - -//------------------------------------------------------------------------------ -// RDCost model - -static const float - av1_rdcost_model_nn_weights_layer0[NUM_FEATURES * NUM_HIDDEN_NODES] = { - -0.0699f, 0.2790f, 0.1915f, 0.2669f, 0.4637f, 0.4095f, - 0.2129f, 0.0634f, 0.2306f, -0.2232f, -0.5711f, -0.6493f, - -0.7406f, -0.8440f, 0.4105f, 0.1392f, 0.5218f, -0.1618f, - -0.1719f, 0.3409f, 0.1111f, -0.3609f, -0.2929f, 0.3869f, - -0.5373f, 0.0700f, 0.2572f, 0.2483f, -0.0314f, 0.5228f, - 0.0169f, -0.1357f, 0.0419f, -0.1722f, 0.1303f, 0.1198f, - -0.0013f, 0.1309f, 0.0293f, -0.1941f, 0.0668f, -0.0643f, - -0.0381f, 0.1249f, -0.0731f, -0.1649f, 0.0964f, 0.0270f, - 0.1354f, 0.0538f, -0.2064f, -0.2067f, -0.0569f, 0.0449f, - 0.1680f, -0.0732f, -0.0785f, 0.1884f, -0.2137f, -0.0189f, - 0.2976f, 0.2818f, -0.0222f, 0.2658f, 0.0488f, 0.2778f, - -0.1110f, 0.2069f, -0.0072f, -0.0095f, -0.1105f, -0.1365f, - -0.4245f, -0.4751f, -0.0736f, 0.2333f, 0.0653f, -0.0249f, - 0.0055f, -0.0838f, -0.0489f, -0.2597f, 0.2621f, -0.0251f, - -0.0545f, 0.0816f, -0.0816f, 0.3396f, -0.1047f, 0.3678f, - 0.1487f, -0.0270f, 0.2574f, 0.1018f, 0.2560f, -0.0598f, - -0.0446f, -0.1792f, 0.5336f, -0.1590f, -0.9820f, -0.6514f, - -0.6304f, -0.8359f, -0.0699f, 0.0295f, -0.0057f, -0.3088f, - -0.1466f, 0.2220f, -0.1980f, -0.3400f, -0.1228f, 0.2667f, - -0.4816f, 0.0155f, -0.0194f, 0.2051f, 0.0513f, 0.1575f, - -121.4240f, -126.6840f, -124.1106f, -127.6184f, -85.0333f, -26.6396f, - 2.7020f, 102.0452f, -85.5128f, 0.0076f, 122.2206f, 107.5265f, - 108.3773f, 93.4847f, 20.3705f, -89.6993f, -176.9070f, -41.7543f, - -123.0293f, -91.6437f, -205.7099f, -62.5346f, -83.2987f, 21.3830f, - 56.6341f, -120.8647f, -127.7562f, -121.6688f, -127.4225f, -74.8045f, - -15.9247f, -14.6468f, -14.7788f, -15.4498f, -18.5514f, -11.1579f, - -5.8164f, -3.4318f, 0.8100f, 0.0642f, 203.5111f, 189.6872f, - 190.4776f, 176.4784f, -4.9427f, -12.5324f, -7.6861f, 21.9182f, - -6.7864f, -7.1906f, -8.1292f, 21.4780f, -7.8016f, -5.2653f, - 61.8526f, -15.5105f, -14.6900f, -14.1459f, -15.4350f, -19.1379f, - -0.7876f, -1.8558f, -4.6035f, -6.8405f, -0.2904f, 2.3202f, - 1.8127f, -2.9397f, -0.8187f, -0.6098f, 22.6173f, 10.3668f, - 12.9363f, 2.4541f, 6.6700f, 0.3804f, -3.3117f, 8.5464f, - -25.8348f, 1.8698f, -9.5753f, 8.5558f, -16.3576f, 7.2217f, - 35.3115f, -1.1447f, -2.6530f, -4.7027f, -5.7024f, -0.9513f, - 0.8393f, 0.7085f, 0.7879f, 0.3728f, 3.0574f, 1.1360f, - 26.0531f, 4.1084f, -1.7340f, 0.1683f, -450.7927f, -444.5818f, - -442.5239f, -438.1168f, 2.4924f, -0.0147f, -0.0797f, -47.5322f, - -1.7638f, -0.8608f, -0.6500f, -44.4326f, -0.9027f, 2.5560f, - -267.6517f, 0.2642f, 0.9457f, 0.7944f, 0.3609f, 3.2742f, - -74.3400f, -81.6894f, -76.2162f, -69.2979f, -90.2476f, -39.7389f, - 2.2545f, 36.5095f, -60.1129f, -1.0383f, 87.0348f, 83.9940f, - 83.7199f, 80.8609f, 14.9075f, -78.7405f, -74.3549f, -4.2382f, - -23.9739f, -91.8469f, -67.2654f, -21.5293f, -9.9857f, 11.8391f, - 35.8223f, -74.2551f, -81.0729f, -73.8347f, -70.3798f, -86.8052f, - 0.1701f, -0.1136f, 0.0060f, -0.0496f, -0.1727f, 0.0195f, - -0.1040f, 0.1027f, 0.0467f, -0.2538f, -0.1322f, 0.0860f, - 0.0093f, -0.2801f, -0.0958f, 0.0497f, -0.0582f, -0.0311f, - 0.1840f, 0.0752f, 0.0282f, 0.0297f, 0.0607f, 0.0650f, - 0.0893f, 0.1297f, 0.0373f, 0.0040f, -0.0973f, 0.0248f, - -0.1419f, 0.0322f, -0.0712f, 0.0860f, -0.0426f, -0.1989f, - 0.1393f, -0.1183f, 0.0735f, -0.1895f, 0.1447f, -0.0056f, - -0.1833f, 0.0884f, 0.0949f, 0.0476f, 0.0551f, 0.2125f, - -0.1537f, -0.0141f, -0.2182f, 0.1567f, 0.0457f, -0.1485f, - -0.1177f, 0.0391f, 0.1982f, -0.1288f, 0.1165f, -0.2019f, - 0.4550f, 0.5179f, 0.4311f, 0.1861f, 0.6199f, 0.4542f, - 0.2034f, 0.1128f, 1.3489f, -0.2525f, -2.1139f, -2.2444f, - -2.3679f, -2.3378f, 0.5682f, 0.1348f, 0.3032f, -1.5835f, - 0.2883f, 0.1693f, 0.0439f, -1.4556f, 0.3818f, 0.4875f, - -1.8899f, 0.2510f, 0.6450f, 0.6082f, 0.5962f, 0.8131f, - 12.0281f, 13.3899f, 13.6249f, 15.8068f, -1.5453f, 6.7456f, - -6.0877f, 26.2596f, 6.2223f, -0.5922f, 134.1428f, 128.8985f, - 128.7538f, 123.0920f, 1.3207f, 18.3069f, 15.7436f, 46.5230f, - 24.7455f, 15.0688f, 19.9965f, 34.7236f, 19.7171f, 1.2018f, - 49.7274f, 11.8957f, 13.1578f, 14.0451f, 15.3544f, -3.5601f, - 1.0048f, 0.9479f, 1.1832f, 2.0635f, -2.9808f, 2.0803f, - -7.5815f, 8.4733f, -4.2008f, 0.1217f, 226.5257f, 210.7018f, - 211.6235f, 195.2605f, 0.8283f, 1.0977f, 1.4858f, 41.1242f, - 1.5822f, 0.8742f, 2.0440f, 33.6213f, 1.6177f, 0.9661f, - 65.0014f, 1.4197f, 1.0109f, 1.3153f, 1.5470f, -3.2833f, - 2.0858f, 2.0012f, 2.1088f, 2.5593f, -0.9422f, 1.8554f, - -6.5378f, 0.6780f, 2.3186f, 0.0506f, 218.3285f, 203.4055f, - 204.0362f, 188.7854f, 0.3701f, 2.5257f, 3.5172f, 28.8144f, - 2.1511f, 3.4676f, 2.6337f, 28.5113f, 2.4254f, -0.0548f, - 59.4511f, 2.0757f, 2.1551f, 2.2271f, 2.5300f, -1.4173f, - 91.9240f, 88.2142f, 83.6155f, 82.2482f, -9.2566f, 10.9654f, - -2.6974f, 62.6750f, -3.6298f, -0.1245f, 69.6721f, 67.1340f, - 66.9162f, 64.1994f, -83.6778f, 76.8107f, 69.7832f, 64.9261f, - 68.4901f, 76.3615f, 70.8108f, 63.5435f, 69.1973f, -83.6034f, - 24.8275f, 90.1923f, 87.6831f, 82.9783f, 81.8558f, -7.1010f, - 95.1656f, 88.3853f, 80.5835f, 79.5990f, -3.0720f, 8.1290f, - -0.6151f, 63.6425f, -4.5833f, -0.0063f, 70.1861f, 66.6250f, - 66.6148f, 63.0886f, -89.2863f, 74.7684f, 64.8897f, 60.4134f, - 62.5241f, 78.7076f, 61.7234f, 60.1688f, 61.9509f, -89.4098f, - 30.3361f, 92.9144f, 88.5954f, 79.6336f, 79.2453f, -0.4101f, - 0.6287f, 0.8050f, 0.4417f, 0.5419f, 0.5972f, 1.3037f, - 0.4316f, -0.0013f, -0.3673f, -0.4952f, 6.1773f, 5.7825f, - 6.1705f, 5.3848f, 1.7607f, -0.0152f, -0.2924f, 0.8199f, - 1.3326f, 0.7197f, -0.6332f, 1.1127f, 1.0472f, 1.8468f, - 3.4419f, 0.8233f, 0.7175f, 0.8514f, 0.6372f, 0.9472f, - -0.0813f, -0.0197f, -0.0096f, -0.2015f, 0.1133f, -0.0305f, - 0.0578f, 0.1375f, -0.0750f, -0.1702f, 0.1246f, -0.1782f, - 0.2017f, 0.0425f, -0.0602f, 0.1837f, 0.1044f, -0.1273f, - -0.1431f, 0.0672f, -0.1807f, -0.1045f, -0.1355f, -0.0497f, - -0.0561f, -0.0633f, 0.1907f, -0.0777f, 0.1203f, 0.0754f, - 0.4079f, 0.2001f, 0.0558f, 0.0622f, 0.2951f, 0.6541f, - -0.0068f, 0.1070f, 0.4469f, -0.1266f, -1.3035f, -1.3324f, - -1.3612f, -0.9966f, 0.7986f, 0.3192f, -0.5028f, -0.3844f, - -0.4079f, 0.6690f, -0.5109f, -0.2719f, -0.4958f, 1.0310f, - -0.8044f, 0.1447f, 0.4221f, 0.3194f, 0.3063f, 0.5520f, - 0.4667f, -5.7238f, -0.5602f, 12.6339f, -15.1865f, -14.9035f, - -3.0726f, 9.5347f, -24.6225f, -2.7086f, 89.8557f, 95.0657f, - 93.8693f, 99.1085f, -35.9483f, -18.0363f, -1.6298f, 25.3484f, - 39.3975f, -15.3199f, 5.7664f, 17.2367f, 25.2788f, -36.5648f, - 29.1426f, 0.3857f, -5.2117f, 0.0533f, 12.1707f, -11.1735f, - 0.2673f, 0.0090f, 0.1574f, 0.0904f, 0.0281f, 0.1144f, - 0.1123f, -0.0061f, 0.0954f, -0.0094f, -0.4387f, -0.5006f, - -0.2560f, -0.2326f, -0.1769f, 0.0465f, 0.1273f, -0.1627f, - 0.2987f, -0.3041f, 0.1131f, -0.3620f, 0.0932f, -0.0649f, - -0.4597f, 0.2535f, -0.0994f, 0.1390f, 0.1279f, 0.4207f, - -39.1159f, -42.6382f, -38.4225f, -31.2301f, -28.2382f, -28.1176f, - -9.5822f, 1.1886f, -1.2964f, -0.7908f, 154.9819f, 147.1914f, - 147.0482f, 138.7535f, -21.7014f, -35.7117f, -28.8802f, -3.8968f, - -21.5007f, -28.2213f, -28.4878f, -3.7558f, -26.8317f, -22.8491f, - 50.9464f, -37.0918f, -42.8811f, -39.3079f, -32.1904f, -26.6354f, - -72.5346f, -75.5751f, -72.6896f, -71.3671f, -35.3279f, -21.6077f, - -5.8259f, 38.7516f, -6.8012f, 0.0172f, 170.0685f, 157.4452f, - 158.2334f, 145.0102f, 10.0653f, -45.1775f, -56.4571f, -5.1165f, - -75.8980f, -46.8672f, -55.3642f, -6.5631f, -81.0258f, 10.1348f, - 55.9786f, -70.8124f, -75.7040f, -73.9831f, -70.8786f, -34.9723f, - 88.6239f, 86.5330f, 80.9333f, 79.6833f, -10.0096f, 10.6312f, - -4.2350f, 62.6230f, -3.2991f, -0.0843f, 75.8659f, 72.7886f, - 72.5301f, 68.8265f, -81.8276f, 70.3025f, 62.9511f, 62.5706f, - 69.1842f, 69.3637f, 65.4820f, 65.4357f, 71.5347f, -82.1064f, - 24.1925f, 86.2418f, 85.4985f, 80.4091f, 79.5378f, -9.3877f, - -7.6594f, -4.9581f, -10.6385f, -20.2307f, -44.2261f, -13.7557f, - -4.5344f, 18.1793f, -10.5522f, -1.5878f, 110.3187f, 102.4945f, - 102.3305f, 94.1324f, -25.2665f, 9.8172f, -4.4791f, 69.4972f, - -6.7571f, 5.8378f, -11.6101f, 70.7066f, -4.9327f, -24.0513f, - 41.4598f, -7.0600f, -7.0940f, -10.2478f, -18.9616f, -46.7505f, - 90.9365f, 86.0260f, 73.2934f, 69.3406f, 3.3863f, 3.8524f, - 0.6536f, 63.2150f, -10.6304f, 0.0291f, 73.0071f, 69.7660f, - 69.0457f, 65.5611f, -92.3379f, 74.2756f, 54.5025f, 84.3183f, - 53.7481f, 73.5624f, 55.3827f, 82.3242f, 53.5432f, -92.5355f, - 25.3457f, 89.1858f, 84.4763f, 72.9840f, 69.1889f, 4.6719f, - -0.0129f, 0.1995f, 0.2069f, 0.0358f, 0.1209f, -0.1185f, - -0.1217f, -0.1456f, 0.0125f, -0.1354f, 0.0510f, -0.0572f, - 0.1397f, 0.1453f, -0.0086f, 0.0107f, 0.0232f, 0.1508f, - 0.0884f, -0.0967f, -0.1786f, 0.1361f, -0.1399f, -0.2021f, - -0.0242f, -0.2169f, 0.0133f, 0.0116f, -0.1489f, -0.0093f, - -0.0796f, 0.1507f, 0.0906f, 0.0228f, -0.0166f, -0.1875f, - 0.0471f, 0.1184f, -0.0007f, -0.2732f, -0.1386f, -0.2057f, - -0.0213f, -0.1699f, 0.0996f, 0.1562f, 0.1850f, -0.0362f, - -0.2059f, 0.0258f, -0.0135f, -0.1276f, 0.0034f, 0.2023f, - 0.0857f, -0.0085f, -0.1955f, -0.1666f, -0.0920f, 0.0971f, - -0.0292f, -0.0512f, -0.0753f, -0.0739f, -0.0873f, -0.1200f, - 0.0220f, -0.1359f, 0.2013f, -0.0445f, 0.1143f, -0.1484f, - -0.1556f, -0.0003f, 0.1711f, -0.0724f, -0.0531f, 0.1126f, - 0.0476f, -0.0057f, 0.0088f, 0.0792f, -0.0438f, -0.1118f, - -0.0244f, 0.0712f, 0.0930f, -0.0203f, 0.1662f, -0.0695f, - -12.3872f, -18.7022f, -13.4237f, -1.4731f, -18.6843f, -14.1515f, - -7.5057f, 40.2090f, -2.7774f, -1.8433f, 123.6006f, 119.0557f, - 118.2758f, 113.6423f, -32.6216f, -19.5865f, -16.2897f, 17.2068f, - 6.3559f, -17.8742f, 0.7098f, 11.5970f, -10.1104f, -33.1830f, - 39.5617f, -10.5499f, -17.8137f, -14.7185f, -2.6172f, -14.6004f, - 0.3893f, 0.4443f, 0.5305f, 0.3049f, 0.8316f, 0.8679f, - 0.2265f, 0.2393f, 1.1970f, -0.2891f, -1.8666f, -1.8266f, - -1.6984f, -1.8787f, 0.8706f, 0.4208f, 0.5076f, -0.8436f, - -0.1623f, 0.8008f, 0.1512f, -1.0839f, -0.3002f, 0.9263f, - -1.3031f, 0.5964f, 0.3413f, 0.5551f, 0.2618f, 0.7018f, - -0.1320f, -0.1944f, -0.0209f, -0.0877f, 0.0721f, -0.0840f, - 0.0589f, 0.1019f, 0.1927f, -0.2011f, -0.1117f, 0.1575f, - 0.1080f, -0.0516f, 0.2154f, -0.1231f, 0.0426f, -0.0522f, - -0.1824f, -0.1923f, -0.1206f, -0.1724f, -0.0798f, 0.0401f, - -0.2170f, 0.0293f, -0.0853f, 0.1517f, 0.2128f, -0.1934f, - 0.0406f, 0.0517f, 0.0822f, -0.0150f, 0.0943f, -0.0989f, - -0.1802f, -0.1453f, -0.1967f, -0.1797f, 0.1545f, -0.1217f, - 0.1755f, -0.1604f, -0.0515f, 0.0509f, 0.0310f, -0.1220f, - -0.1770f, -0.0157f, 0.1989f, -0.0069f, 0.1766f, 0.1267f, - -0.0517f, -0.0396f, 0.0346f, 0.1946f, 0.1162f, -0.1345f, - -106.6179f, -110.5917f, -107.5476f, -108.0601f, -61.1687f, -22.4247f, - 2.6632f, 109.5208f, -66.1177f, 0.0062f, 159.9339f, 144.7755f, - 145.5032f, 128.9872f, 18.9180f, -75.3569f, -105.0866f, -52.0704f, - -119.1299f, -74.7543f, -109.9468f, -59.0682f, -104.5754f, 19.2878f, - 67.2573f, -104.8061f, -111.8610f, -106.6751f, -107.3537f, -56.4758f, - -0.6967f, -0.8495f, -0.9586f, -1.0461f, 1.4522f, -0.2762f, - 28.2828f, 2.9157f, -2.1062f, 0.1566f, -467.2388f, -461.0685f, - -459.0092f, -453.8370f, 1.5422f, -0.8186f, -0.4884f, -53.0399f, - -2.0255f, -1.1348f, -1.1039f, -50.2489f, -1.4821f, 1.8021f, - -258.0319f, -1.0865f, -0.5542f, -1.0443f, -1.2732f, 1.8413f, - 0.2377f, 0.1937f, -0.0116f, 0.0935f, -0.0599f, 0.0118f, - -0.0875f, 0.0455f, -0.1301f, -0.1081f, -0.2622f, -0.1960f, - 0.0393f, -0.1490f, 0.1852f, -0.0964f, -0.0741f, 0.0419f, - 0.1162f, -0.0274f, 0.1200f, -0.0333f, -0.1337f, 0.2141f, - 0.0664f, 0.1044f, -0.1744f, 0.1060f, -0.1468f, 0.0679f, - 0.0218f, 0.0494f, 0.1064f, 0.1363f, 0.0013f, 0.1331f, - -0.2095f, 0.2088f, -0.0399f, -0.1811f, 0.0678f, -0.1974f, - 0.1855f, -0.0968f, -0.2008f, 0.0162f, -0.0096f, -0.1493f, - 0.2170f, -0.1248f, -0.2055f, 0.1276f, -0.0269f, -0.1697f, - -0.0662f, 0.1073f, -0.0029f, -0.1051f, -0.1573f, 0.2106f, - -0.2020f, -0.1565f, 0.0335f, -0.1818f, -0.1665f, 0.2169f, - 0.1974f, -0.1470f, -0.1738f, -0.2038f, 0.0558f, -0.0441f, - 0.0065f, -0.1485f, -0.1366f, -0.2131f, 0.1042f, 0.0349f, - -0.1804f, -0.1361f, -0.0116f, -0.1012f, -0.0860f, 0.0606f, - -0.2077f, 0.1826f, -0.1014f, -0.0721f, -0.1517f, 0.1022f, - -0.1110f, -0.0186f, 0.1505f, 0.1797f, 0.0911f, 0.0340f, - 0.1702f, -0.1404f, -0.0566f, -0.2744f, -0.1943f, -0.1871f, - 0.0046f, 0.0306f, -0.0436f, 0.1625f, -0.1302f, 0.0175f, - 0.1570f, -0.1425f, 0.0779f, 0.1398f, 0.0929f, 0.0897f, - 0.0458f, -0.0936f, 0.1321f, -0.1355f, 0.0974f, 0.0457f, - -73.3516f, -75.0655f, -72.1062f, -72.4624f, -34.8640f, -14.3727f, - -4.4720f, 66.4982f, -18.8358f, 0.0397f, 174.2172f, 160.4959f, - 161.1034f, 147.3250f, 9.5507f, -45.0180f, -73.1609f, -1.5230f, - -74.8677f, -43.8559f, -68.7622f, -4.8971f, -82.1922f, 9.6490f, - 64.7115f, -71.8566f, -75.3879f, -72.5479f, -71.7161f, -34.8056f, - 0.1442f, 0.1558f, 0.1267f, -0.1261f, -0.0506f, -0.0823f, - -0.1807f, -0.0889f, -0.2098f, -0.1295f, -0.2046f, -0.1749f, - -0.1197f, -0.1380f, 0.0799f, -0.0889f, -0.1209f, 0.1919f, - 0.1947f, -0.2086f, -0.1042f, -0.0468f, 0.0232f, 0.1052f, - -0.0535f, 0.1398f, 0.1713f, -0.1522f, 0.1453f, 0.0286f, - -64.8503f, -67.6746f, -63.6497f, -60.4614f, -35.6091f, -20.1605f, - -3.6082f, 84.2801f, -37.8552f, -2.2371f, 132.4947f, 123.5057f, - 123.5776f, 113.9060f, -14.8772f, -40.7130f, -79.1391f, -10.7024f, - -65.7831f, -43.6078f, -79.6847f, -13.0743f, -69.2533f, -16.0171f, - 50.4868f, -64.3678f, -68.7061f, -64.0823f, -59.3413f, -28.9405f, - 77.1601f, 75.4899f, 69.8696f, 67.8764f, -22.7548f, 5.9814f, - -3.2826f, 57.9754f, -5.9500f, -0.0014f, 77.2251f, 74.0737f, - 73.7004f, 70.5072f, -80.9661f, 69.3065f, 55.8337f, 76.8831f, - 57.9902f, 63.4765f, 56.4748f, 70.0282f, 61.0874f, -81.3960f, - 26.2594f, 76.0367f, 74.9115f, 69.2361f, 66.9262f, -20.1637f, - 0.1886f, -0.1108f, 0.1262f, 0.0189f, 0.1382f, 0.0859f, - -0.1874f, -0.1986f, -0.0171f, -0.1400f, -0.2944f, -0.0750f, - -0.0395f, -0.2092f, -0.0878f, 0.1216f, -0.0870f, -0.1613f, - 0.2495f, 0.0754f, 0.0244f, -0.1205f, -0.0196f, -0.1729f, - 0.1170f, 0.1585f, 0.1482f, -0.1705f, -0.1337f, 0.0199f, - 13.0897f, 9.1111f, 6.7413f, 6.3907f, -28.1187f, 0.4556f, - -5.3116f, 30.7293f, -16.3644f, -0.0365f, 118.9118f, 111.6125f, - 111.3227f, 103.4680f, -30.1883f, 8.9328f, -4.1876f, 79.3936f, - -9.0522f, 12.7861f, -1.2736f, 78.0446f, -5.9485f, -30.5716f, - 27.8951f, 13.9613f, 6.7173f, 5.2345f, 8.3271f, -27.3705f, - 1.0488f, 1.0864f, 1.0710f, 1.7332f, -3.0561f, 1.1622f, - -7.6688f, 3.0491f, -1.3865f, 0.0769f, 222.5451f, 207.8170f, - 208.1767f, 193.1396f, 0.4447f, 2.1654f, 1.8929f, 35.1469f, - 1.1783f, 2.6199f, 1.1611f, 26.2989f, 3.4446f, 0.1551f, - 65.6529f, 1.2229f, 0.9851f, 1.0241f, 1.4373f, -3.3421f, - 0.1388f, 0.0756f, 0.2047f, 0.1140f, 0.0945f, 0.2038f, - 0.1038f, -0.2068f, -0.0626f, -0.1937f, 0.1347f, -0.0464f, - -0.0866f, 0.0250f, 0.0264f, -0.1556f, -0.1625f, 0.1028f, - -0.1255f, -0.0854f, 0.1033f, 0.0008f, -0.2133f, -0.0317f, - 0.1725f, -0.1054f, -0.1900f, 0.0383f, 0.0440f, -0.1900f, - -30.0811f, -30.9929f, -29.3194f, -26.8347f, -20.5957f, -4.1595f, - -1.9066f, 42.4707f, -9.0435f, 0.0064f, 175.7328f, 163.1350f, - 163.5085f, 151.1648f, 4.4620f, -20.6011f, -19.3402f, 1.5468f, - -32.0920f, -25.4581f, -12.3706f, -2.1636f, -32.4569f, 3.9365f, - 61.0117f, -28.4195f, -31.0837f, -30.2749f, -27.5522f, -22.8688f, - -0.3000f, 0.0092f, -0.3675f, -0.4113f, 0.0033f, 0.1138f, - 0.2182f, -0.5803f, 0.7507f, -0.2529f, -1.7724f, -1.4702f, - -1.5805f, -1.4294f, 0.1435f, -0.0168f, 0.2356f, -0.4373f, - -0.4500f, -0.4803f, -0.0041f, -0.3878f, 0.1321f, 0.2761f, - -1.1975f, -0.3509f, -0.0465f, -0.4050f, -0.1110f, 0.2233f, - 0.0950f, 0.0974f, -0.1600f, -0.1753f, -0.0328f, 0.0741f, - -0.0706f, 0.1839f, -0.0833f, -0.1367f, -0.1094f, -0.1739f, - -0.1069f, 0.0370f, -0.1404f, 0.1631f, -0.1570f, 0.2117f, - -0.1891f, 0.0395f, 0.1081f, 0.1760f, 0.0997f, 0.0853f, - -0.1018f, 0.1306f, -0.0924f, -0.2078f, 0.0801f, -0.0949f, - 0.5803f, 0.5578f, 0.4089f, 0.1912f, 0.6774f, 0.3145f, - 0.3992f, -0.1316f, 1.3142f, -0.2457f, -2.3536f, -2.4939f, - -2.3165f, -2.4879f, 0.2321f, 0.1901f, 0.1789f, -1.5215f, - 0.2645f, 0.2231f, 0.2411f, -1.2361f, 0.2971f, 0.1421f, - -1.6715f, 0.3158f, 0.2476f, 0.3596f, 0.3029f, 0.9297f, - -88.8401f, -89.5209f, -86.1926f, -87.4196f, -39.6504f, -17.9684f, - -4.2702f, 80.2017f, -29.1676f, -0.4190f, 150.2820f, 138.4751f, - 139.1087f, 126.6569f, 13.7188f, -57.0739f, -80.3383f, -18.8351f, - -87.4103f, -56.0072f, -82.7707f, -23.1871f, -93.6787f, 13.9287f, - 59.6213f, -87.4843f, -90.4227f, -86.2635f, -86.6841f, -37.9086f, - 0.1184f, -0.2169f, -0.1915f, 0.0543f, 0.1253f, -0.1370f, - 0.0836f, -0.1198f, 0.1544f, -0.2004f, -0.1118f, -0.0786f, - 0.1517f, -0.1000f, -0.1055f, 0.0936f, -0.1579f, 0.1098f, - -0.0234f, -0.0499f, 0.0951f, -0.1711f, 0.0186f, -0.2008f, - 0.1777f, 0.1386f, -0.1495f, -0.0684f, -0.2149f, -0.1198f, - -0.6205f, -0.7209f, -0.5487f, -0.9080f, 1.3400f, 0.0085f, - 28.2837f, 3.2217f, -1.8463f, 0.1620f, -464.3599f, -458.4327f, - -455.9967f, -451.0393f, 1.6619f, -0.6944f, -0.3167f, -52.3630f, - -1.6971f, -0.7340f, -0.8923f, -49.2771f, -1.1177f, 1.8810f, - -258.9386f, -1.0765f, -0.7279f, -0.5208f, -0.8839f, 1.8175f, - -78.8510f, -80.5740f, -77.8843f, -77.9798f, -36.5560f, -16.0818f, - -5.5362f, 66.4228f, -16.8150f, 0.0036f, 181.8365f, 167.7181f, - 168.2344f, 153.9725f, 11.2659f, -47.5786f, -92.6978f, 6.7573f, - -68.7704f, -48.3850f, -95.3637f, 8.8888f, -76.9497f, 11.2243f, - 60.9020f, -77.6515f, -80.7610f, -78.4537f, -77.4659f, -36.2872f, - -0.0936f, 0.1966f, -0.2121f, 0.0193f, 0.0489f, -0.1445f, - 0.0060f, 0.0358f, -0.0783f, -0.0985f, -0.2072f, -0.0802f, - -0.0185f, 0.1868f, -0.0631f, 0.1260f, -0.0675f, 0.2167f, - -0.2174f, -0.1085f, 0.1483f, -0.1655f, -0.1040f, 0.1605f, - -0.1673f, -0.0148f, -0.1856f, -0.1454f, 0.1603f, -0.1620f, - -0.9205f, -1.2716f, -3.6561f, -5.0834f, -0.7934f, 1.8710f, - 2.2999f, -2.9516f, -1.7631f, -0.3804f, 41.2998f, 26.2358f, - 28.9763f, 15.7315f, 5.2164f, 3.2963f, -5.4457f, 18.6310f, - -25.0076f, 5.4368f, -12.0085f, 17.1462f, -14.6992f, 5.6365f, - 48.6207f, -1.0921f, -1.8723f, -3.5354f, -5.1774f, -1.0200f, - -0.1065f, -0.2021f, 0.0332f, 0.1692f, -0.1239f, 0.1325f, - -0.0660f, -0.0567f, 0.2107f, -0.2084f, -0.0263f, 0.1411f, - 0.0178f, 0.0451f, 0.2024f, -0.1756f, -0.0771f, -0.1690f, - -0.2097f, -0.2130f, 0.0714f, 0.0172f, -0.0310f, 0.0649f, - -0.1550f, 0.0701f, 0.0306f, -0.1750f, -0.1988f, -0.2060f, - 0.0005f, -0.1325f, -0.1823f, -0.0900f, -0.1291f, -0.1817f, - 0.0144f, 0.0951f, -0.1954f, -0.0171f, -0.1985f, 0.0875f, - 0.0901f, -0.0857f, 0.1681f, 0.0465f, 0.1023f, 0.0985f, - -0.2152f, -0.1723f, -0.0825f, 0.0203f, -0.1206f, -0.1431f, - -0.1552f, 0.1344f, 0.0398f, 0.0169f, 0.2180f, -0.1530f, - 2.7964f, 2.7312f, 2.8831f, 3.4729f, -3.1366f, 2.4043f, - -7.2004f, 1.4128f, 2.8648f, 0.0578f, 225.5640f, 210.3712f, - 210.6907f, 195.0339f, 0.3140f, 1.8060f, 2.7355f, 33.6917f, - 3.3542f, 3.3682f, 1.7371f, 31.2424f, 3.4094f, -0.1192f, - 63.0864f, 3.0562f, 2.8633f, 2.6777f, 3.5495f, -4.2616f, - -1.4034f, 0.3930f, -4.6756f, -9.9870f, -27.8511f, 5.6071f, - -1.0862f, 34.4907f, -10.4831f, -0.0281f, 117.2617f, 104.9590f, - 106.1515f, 93.9707f, -16.8801f, 5.3036f, -21.7458f, 98.5306f, - -20.7596f, 6.4733f, -17.6440f, 98.3097f, -31.9540f, -17.0600f, - 27.4543f, -0.6140f, -1.6182f, -4.9167f, -8.9017f, -26.2485f, - -0.1952f, -0.0462f, -0.1958f, 0.1679f, -0.1592f, -0.1634f, - -0.0507f, -0.0542f, 0.0038f, -0.0343f, 0.0567f, -0.1983f, - 0.0250f, -0.0762f, 0.0902f, -0.0343f, 0.1240f, 0.1161f, - 0.1237f, 0.1870f, 0.0346f, 0.0340f, 0.0625f, -0.0355f, - 0.0278f, -0.1043f, 0.1755f, 0.0253f, 0.1750f, -0.2070f, - -5.5531f, -5.3122f, -4.9348f, -4.4782f, -7.5686f, -1.5478f, - -5.4341f, 0.5087f, -2.1382f, 0.0798f, 208.3677f, 194.0083f, - 194.4168f, 179.3082f, 1.4443f, -1.5038f, -1.4021f, 25.9363f, - -4.0635f, -2.6785f, -1.6640f, 22.2589f, -1.4910f, 1.4715f, - 59.1972f, -4.9638f, -5.1920f, -4.9193f, -5.2649f, -8.0556f, - 20.1226f, 12.0195f, 9.7385f, 10.7058f, -27.4201f, 8.4869f, - -5.0826f, 32.9212f, -2.0674f, -0.0290f, 120.5002f, 112.3222f, - 112.3287f, 104.1107f, -20.6293f, 14.8534f, -0.8748f, 103.1141f, - -1.1368f, 15.3716f, 2.7653f, 91.7285f, -0.5991f, -20.7338f, - 35.9363f, 20.5104f, 11.1988f, 9.0368f, 10.6355f, -26.5309f, - -0.2058f, -0.2176f, 0.1331f, -0.1415f, -0.0825f, -0.0470f, - -0.0615f, 0.1274f, 0.0076f, -0.0575f, -0.2065f, 0.0866f, - 0.2166f, -0.1942f, -0.1952f, 0.1323f, -0.1016f, 0.1803f, - -0.0424f, 0.1555f, 0.1118f, 0.1559f, 0.0337f, -0.0341f, - -0.0430f, 0.1988f, -0.0553f, -0.0255f, 0.1817f, 0.0608f, - 0.1431f, 0.0686f, -0.0245f, -0.2107f, 0.2001f, -0.0964f, - -0.0090f, 0.1151f, -0.0365f, -0.1986f, 0.1740f, -0.2098f, - 0.0013f, 0.1369f, 0.1910f, 0.1801f, -0.2019f, 0.0348f, - -0.1175f, 0.0627f, -0.1929f, -0.0099f, 0.1349f, 0.1804f, - -0.1071f, -0.1651f, -0.1146f, -0.0259f, 0.1626f, -0.0271f, - 0.1393f, 0.1304f, -0.0200f, 0.0924f, -0.0839f, -0.0031f, - -0.1311f, 0.0350f, -0.1330f, -0.0911f, 0.1949f, -0.0209f, - -0.1883f, 0.0269f, 0.2040f, 0.1552f, 0.1532f, 0.1157f, - -0.1102f, -0.1220f, -0.0808f, -0.1050f, 0.1716f, 0.0846f, - -0.0180f, -0.1037f, 0.2063f, 0.1237f, 0.1253f, -0.0496f, - -0.0183f, 0.0491f, 0.1703f, -0.0824f, -0.0702f, -0.1100f, - -0.0965f, 0.0130f, -0.1222f, -0.1081f, 0.0329f, 0.2115f, - -0.1438f, 0.0799f, -0.1602f, -0.0330f, 0.0501f, 0.1072f, - -0.0744f, -0.1783f, -0.0240f, 0.0777f, -0.1944f, 0.0438f, - -0.0033f, -0.1873f, 0.0984f, -0.0318f, 0.0773f, 0.1489f, - 0.3966f, 0.4711f, 0.3972f, 0.0623f, 0.5970f, 0.1018f, - 0.1375f, -0.1881f, 0.8921f, -0.1854f, -2.1138f, -2.1178f, - -1.8295f, -2.1703f, 0.5784f, -0.1937f, -0.0728f, -0.9953f, - 0.2442f, -0.4074f, -0.1591f, -1.1660f, 0.4832f, 0.2203f, - -1.4957f, 0.1544f, 0.1810f, 0.2275f, 0.4075f, 0.8153f, - 0.0715f, 0.0222f, 0.0463f, -0.0201f, 0.0396f, 0.5951f, - -0.2779f, -0.0306f, 0.7532f, -0.1596f, -4.1080f, -3.7925f, - -3.8522f, -3.2468f, 0.7728f, 0.0188f, -0.1448f, 0.4084f, - -0.4666f, -0.1036f, -1.1469f, 0.4243f, 0.2778f, 0.9023f, - -3.0216f, 0.0384f, -0.3348f, -0.0314f, -0.2788f, 0.0479f, - 139.0773f, 131.6164f, 115.0392f, 111.1817f, 41.7596f, 9.5379f, - 1.8542f, 46.9890f, -12.8221f, 0.0241f, 52.9779f, 51.5268f, - 50.8060f, 48.7028f, -132.9665f, 118.3478f, 101.1239f, 81.4608f, - 75.4251f, 121.0643f, 97.8947f, 86.8911f, 74.5576f, -133.7606f, - 29.2657f, 135.8916f, 131.3661f, 114.1687f, 111.0784f, 31.3790f, - -0.0807f, -0.0657f, -0.0027f, 0.0410f, 0.0765f, 0.1194f, - 0.0953f, -0.0060f, 0.1531f, -0.2339f, 0.1488f, -0.0615f, - -0.0579f, 0.0761f, 0.1250f, -0.0469f, 0.1480f, 0.0683f, - -0.0049f, 0.1558f, 0.2168f, -0.0736f, 0.1135f, -0.1244f, - 0.0725f, -0.1297f, -0.0215f, -0.0412f, -0.1632f, -0.0200f, - -0.1346f, -0.1954f, 0.0053f, 0.0151f, 0.1379f, -0.1497f, - -0.0102f, -0.0336f, 0.0900f, -0.1706f, -0.0932f, -0.2084f, - 0.1242f, -0.2027f, 0.0849f, -0.2139f, -0.2015f, 0.0944f, - -0.0984f, 0.2082f, 0.1625f, -0.0227f, -0.1676f, 0.1021f, - 0.1516f, 0.0245f, 0.0955f, -0.1488f, -0.0057f, 0.1783f, - -0.8568f, -0.8175f, -0.6282f, -1.3107f, 1.5712f, 0.1044f, - 28.2289f, 3.0885f, -1.9829f, 0.1600f, -465.9583f, -459.5893f, - -457.5055f, -452.7600f, 1.7229f, -0.6620f, -0.1065f, -52.8017f, - -2.0293f, -0.8224f, -1.0389f, -49.9049f, -1.2250f, 1.7647f, - -259.2465f, -1.0978f, -0.5169f, -0.8721f, -0.8197f, 1.9158f, - 16.2234f, 15.8523f, 13.8343f, 9.8509f, -21.4326f, 15.7650f, - -6.4451f, 34.8575f, 1.1387f, -0.0223f, 117.7213f, 109.8494f, - 109.7624f, 101.8532f, -20.3275f, 16.0812f, 4.9165f, 92.4919f, - 4.1615f, 13.8451f, 9.2112f, 97.1580f, -8.7037f, -20.4420f, - 27.1105f, 17.4922f, 13.9998f, 12.3888f, 11.4705f, -20.9568f, - 0.5457f, 0.5322f, 0.2823f, 0.3581f, 0.5359f, 0.1576f, - 0.1969f, -0.0136f, -0.2748f, -0.3168f, -0.3918f, -0.2167f, - -0.1797f, -0.1869f, 0.2986f, -0.2116f, -0.4226f, -0.2022f, - 0.9452f, 0.5474f, -0.1218f, 0.2067f, -0.1600f, 0.1937f, - 0.0808f, 0.4877f, 0.5106f, 0.2626f, 0.5076f, 0.6228f, - 0.5124f, 0.4044f, 0.4023f, 0.1222f, 2.5446f, 0.9623f, - 24.9875f, 4.7442f, -2.0551f, 0.1642f, -449.9478f, -444.1841f, - -442.0153f, -437.1498f, 2.3209f, -0.6986f, -0.3456f, -47.4074f, - -1.2374f, -1.0939f, -0.9112f, -41.1851f, -0.5064f, 2.4209f, - -263.4446f, -0.0433f, 0.3460f, 0.1475f, 0.3770f, 2.9154f, - 0.2032f, 0.1527f, 0.2161f, -0.1981f, 0.1893f, -0.2003f, - 0.1734f, 0.1713f, 0.1207f, -0.2073f, -0.1018f, 0.0770f, - 0.0728f, 0.1665f, 0.0689f, 0.1884f, -0.1399f, -0.1326f, - -0.0518f, -0.1948f, 0.1576f, -0.1835f, 0.1436f, 0.0497f, - 0.0883f, -0.1253f, -0.0417f, -0.0507f, -0.1555f, 0.2076f, - -2.4080f, 6.1616f, -0.8564f, -13.6773f, -32.7238f, -16.3144f, - -1.9828f, 20.5110f, -17.0191f, -1.7154f, 103.6642f, 95.3675f, - 95.5662f, 86.9504f, -35.5340f, 19.6681f, -2.4900f, 65.0847f, - -15.8119f, 13.7256f, -4.6753f, 63.4713f, -6.5992f, -34.2369f, - 41.3959f, -1.5528f, 3.8106f, -0.7762f, -12.3204f, -35.1734f, - -83.9509f, -87.4861f, -83.5925f, -81.5047f, -54.1256f, -45.7506f, - -13.5325f, -6.0331f, -8.5062f, 0.0261f, 189.9450f, 177.7870f, - 178.6945f, 164.9762f, 9.8521f, -68.0619f, -68.6145f, 6.5056f, - -55.9651f, -66.9540f, -65.3349f, -2.1954f, -57.2408f, 8.6577f, - 60.6966f, -82.1056f, -88.5245f, -83.3057f, -80.7283f, -50.5285f, - -0.1397f, 0.1862f, -0.0691f, -0.0906f, 0.1560f, 0.1377f, - -0.0066f, -0.0213f, 0.0708f, -0.0386f, -0.0015f, -0.0020f, - -0.2122f, 0.0747f, 0.0795f, 0.0229f, 0.1923f, -0.1661f, - 0.0895f, 0.1176f, 0.1398f, -0.0443f, 0.0934f, 0.0638f, - -0.1924f, 0.0602f, 0.0404f, 0.1597f, 0.1387f, -0.0601f, - -28.3967f, -21.8483f, -25.5175f, -29.9252f, 2.0161f, -3.0092f, - 7.7435f, 28.2367f, -35.0188f, -0.1578f, 105.0164f, 93.4495f, - 94.9134f, 81.0315f, 4.3602f, 8.1303f, -37.7665f, -16.6986f, - -40.8902f, 8.2542f, -33.3215f, -2.0457f, -69.0245f, 4.1016f, - 47.2770f, -25.8268f, -23.6034f, -26.4339f, -27.8305f, 8.4468f, - 13.8742f, 8.3874f, 4.2044f, 1.4619f, -40.2909f, -0.6358f, - -0.7982f, 36.1931f, -17.3147f, -0.3348f, 106.8135f, 96.5298f, - 97.8829f, 86.9994f, -25.8170f, 15.0652f, -0.9181f, 85.8544f, - 2.5475f, 9.8009f, -3.5931f, 89.2017f, -3.7252f, -25.2986f, - 22.5505f, 14.0434f, 7.0708f, 4.6646f, 1.5807f, -39.4024f, - -0.1436f, 0.0256f, 0.0274f, -0.2126f, 0.0401f, 0.0745f, - -0.0379f, -0.0357f, 0.0777f, -0.0709f, -0.1093f, -0.2047f, - -0.0713f, -0.0478f, -0.0908f, 0.1963f, 0.1282f, 0.0977f, - 0.1304f, 0.2058f, 0.0700f, 0.0518f, 0.0239f, 0.0686f, - -0.1909f, 0.0828f, -0.1243f, -0.1920f, 0.1908f, -0.0808f, - 90.8028f, 89.2894f, 84.5339f, 83.3491f, -13.3838f, 12.0240f, - -3.9443f, 63.0867f, -2.5321f, -0.0099f, 68.9140f, 66.3206f, - 66.0278f, 63.1498f, -83.7261f, 74.3448f, 73.4998f, 64.8477f, - 69.7701f, 74.5878f, 71.0331f, 63.2116f, 74.3162f, -83.9282f, - 20.8163f, 89.6818f, 88.6452f, 83.7338f, 82.9360f, -13.2357f, - 0.1299f, -0.1765f, -0.0168f, -0.1372f, -0.1183f, 0.0472f, - 0.1312f, 0.0267f, 0.0194f, -0.1593f, 0.0059f, 0.1775f, - 0.0668f, -0.1239f, -0.1982f, -0.1415f, -0.1659f, -0.1148f, - 0.0136f, 0.0913f, -0.1254f, -0.0357f, 0.0892f, 0.0835f, - -0.0554f, 0.1969f, -0.0888f, -0.0623f, -0.0236f, -0.1492f, - 0.4196f, 0.3218f, 0.2287f, 0.5095f, 0.7210f, 0.2279f, - 0.4523f, -0.1832f, 1.3095f, -0.2041f, -2.1443f, -2.1947f, - -1.9292f, -2.1142f, 0.5840f, 0.1018f, 0.1011f, -1.6565f, - 0.4325f, 0.0424f, 0.2836f, -1.7183f, 0.2595f, 0.2686f, - -1.8784f, 0.3891f, 0.3050f, 0.6195f, 0.2896f, 0.5905f, - -5.3024f, -3.2518f, -12.5192f, -29.1732f, 1.6538f, -1.8315f, - 9.9788f, 10.5155f, 6.3234f, -0.3460f, 76.9925f, 51.3785f, - 55.7120f, 29.0432f, 5.5901f, 25.6578f, -3.9565f, 13.0509f, - -106.0371f, 23.2124f, -18.2004f, 8.4618f, -69.3585f, 5.5651f, - 80.0565f, -6.4941f, -5.3742f, -14.4209f, -24.1565f, 6.6801f, - -22.0585f, -20.9909f, -26.7939f, -29.6890f, -14.5085f, 2.1866f, - -4.2608f, 17.3977f, -30.8824f, -0.4017f, 135.6957f, 126.9320f, - 127.0044f, 118.1835f, -1.8768f, -0.8629f, -32.0882f, 44.7862f, - -23.9174f, 1.6485f, -27.9940f, 51.9078f, -48.5279f, -1.7550f, - 49.9230f, -19.9785f, -22.4647f, -27.6911f, -27.3197f, -10.6545f, - -0.1922f, -0.1999f, -0.1396f, 0.1065f, 0.0085f, -0.1940f, - 0.0351f, 0.1285f, -0.0292f, -0.1296f, 0.1543f, -0.2082f, - -0.1758f, 0.0719f, 0.0764f, 0.1394f, -0.0255f, -0.0370f, - 0.1615f, -0.0568f, 0.1920f, -0.1631f, 0.0199f, 0.1884f, - 0.0693f, 0.1074f, -0.0273f, 0.1540f, 0.0098f, 0.2111f, - 0.1805f, -0.0555f, 0.1159f, 0.0469f, 0.1789f, -0.1711f, - -0.1304f, 0.1912f, -0.0737f, -0.1408f, 0.1804f, -0.2023f, - -0.0467f, -0.1019f, -0.0136f, 0.0691f, 0.1454f, -0.0213f, - 0.0929f, -0.0958f, 0.1299f, 0.1137f, 0.1175f, 0.1042f, - -0.2081f, -0.0737f, 0.0582f, 0.1640f, 0.2120f, -0.0646f, - -0.0326f, 0.1976f, 0.1182f, -0.1365f, -0.1784f, 0.2113f, - 0.0469f, 0.0763f, -0.0197f, -0.1902f, 0.1259f, 0.1598f, - -0.0180f, -0.1339f, -0.1675f, -0.1884f, -0.1973f, 0.1529f, - 0.1160f, 0.2154f, -0.1446f, -0.1395f, 0.0355f, 0.1513f, - -0.2086f, -0.1135f, -0.1502f, -0.0018f, 0.0486f, -0.0110f, - -0.0843f, -0.0716f, -0.1367f, 0.0753f, 0.0114f, 0.0475f, - -0.0632f, 0.2045f, -0.0512f, -0.0906f, -0.1071f, -0.1957f, - 0.1361f, 0.1821f, -0.1684f, -0.1383f, 0.1059f, 0.1579f, - -0.0064f, -0.1205f, -0.0718f, -0.1323f, -0.0174f, -0.1092f, - -0.1915f, 0.1978f, -0.1245f, 0.1297f, -0.1542f, 0.1556f, - -0.1752f, 0.0718f, -0.1020f, -0.1970f, 0.0518f, -0.0888f, - 0.0541f, -0.1922f, -0.1467f, -0.0653f, -0.1940f, -0.0800f, - -0.1096f, -0.0796f, -0.1310f, 0.0191f, -0.1077f, -0.0973f, - 0.1566f, 0.0074f, 0.0500f, -0.0415f, -0.2116f, 0.0227f, - 0.0895f, 0.1528f, 0.1404f, 0.0467f, 0.0462f, -0.0973f, - -0.1669f, 0.0551f, 0.1167f, -0.1470f, -0.0542f, -0.1006f, - 0.2104f, 0.1039f, -0.0211f, -0.1726f, -0.0694f, -0.0270f, - 0.0277f, -0.0715f, -0.2055f, -0.1502f, -0.1718f, -0.0043f, - 0.0174f, 0.1019f, -0.0233f, -0.1518f, -0.1331f, -0.0001f, - -0.1483f, -0.2115f, 0.0666f, 0.0014f, 0.1601f, -0.0690f, - }; - -static const float av1_rdcost_model_nn_biases_layer0[NUM_HIDDEN_NODES] = { - 0.156824f, 0.f, 0.130013f, 0.084482f, -129.058197f, -15.090252f, - -3.859116f, 0.736356f, -81.361557f, -0.001922f, -0.000713f, 0.440181f, - 14.982646f, 1.282223f, 2.23122f, 94.26635f, 93.920929f, 0.614672f, - 0.f, 0.315858f, 4.746014f, 0.116901f, -35.661354f, -75.148285f, - 92.006989f, -14.112332f, 86.673157f, -0.000307f, -0.000544f, 0.f, - -7.851313f, 0.505186f, 0.f, 0.f, -111.681091f, -0.937782f, - 0.035789f, 0.f, 0.f, -0.00102f, -75.180527f, 0.f, - -63.821148f, 79.592392f, 0.085068f, 11.184906f, 1.25406f, 0.f, - -29.779242f, -0.181732f, 0.f, 0.425554f, -90.78405f, 0.f, - -0.828326f, -81.132179f, 0.f, -2.757063f, 0.f, 0.f, - 2.967951f, -4.440599f, 0.f, -5.105355f, 14.734543f, 0.f, - 0.f, 0.f, 0.f, 0.295342f, -0.026907f, 133.375412f, - -0.000855f, 0.f, -0.875029f, 15.665165f, 0.437296f, 0.321257f, - -0.001932f, -4.235782f, -87.187782f, 0.f, -28.84696f, 7.055514f, - 0.f, 95.548302f, -0.000425f, 0.38969f, -13.88008f, -27.347931f, - 0.f, 0.f, 0.f, -0.000026f, 0.f, 0.f, -}; - -static const float - av1_rdcost_model_nn_weights_layer1[NUM_HIDDEN_NODES * NUM_OUTPUTS] = { - -0.101706f, -0.14411f, -0.139118f, -0.132945f, 118.811302f, - 3.137232f, -32.969776f, -4.150725f, 26.263071f, 0.092841f, - 0.174125f, -0.028195f, 15.712872f, 17.722702f, 5.666006f, - -121.143929f, -131.933731f, -3.000318f, -0.032063f, -0.380065f, - -1.660653f, -0.164802f, 7.177527f, 87.759155f, -119.564224f, - -98.051651f, -110.581116f, -0.069982f, 0.023906f, 0.183792f, - 40.606274f, -0.080804f, -0.053744f, -0.187848f, 157.44313f, - -4.820149f, 0.089499f, 0.070232f, -0.043038f, 0.072996f, - 93.347313f, 0.225259f, 103.223228f, -110.682541f, 0.14314f, - -89.827538f, 6.505952f, -0.076949f, 73.816132f, -0.063416f, - -0.23736f, -0.066059f, 116.049599f, 0.120871f, -4.708246f, - 107.501671f, -0.206708f, -32.688675f, 0.047608f, -0.105907f, - 6.505825f, -75.461891f, -0.160341f, 6.532121f, -84.868111f, - -0.065622f, 0.044756f, 0.008672f, 0.017155f, 0.046108f, - -0.218818f, -126.507957f, 0.028271f, 0.180625f, -4.707376f, - -121.524307f, -0.03853f, -4.103166f, -0.018947f, -95.768463f, - 15.941695f, 0.147154f, -102.863029f, -72.521698f, -0.037133f, - -138.1492f, 0.210016f, -0.084692f, -68.693665f, -52.523472f, - -0.133385f, -0.17438f, 0.008654f, -0.035642f, -0.145202f, - 0.211135f, - }; - -static const float av1_rdcost_model_nn_biases_layer1[NUM_OUTPUTS] = { - 0.251909f -}; - -static const NN_CONFIG av1_rdcost_model_nnconfig = { - NUM_FEATURES, - NUM_OUTPUTS, - NUM_HIDDEN_LAYERS, - { - NUM_HIDDEN_NODES, - }, - { - av1_rdcost_model_nn_weights_layer0, - av1_rdcost_model_nn_weights_layer1, - }, - { - av1_rdcost_model_nn_biases_layer0, - av1_rdcost_model_nn_biases_layer1, - }, -}; - -//------------------------------------------------------------------------------ - -#undef NUM_FEATURES -#undef NUM_HIDDEN_LAYERS -#undef NUM_HIDDEN_NODES -#undef NUM_OUTPUTS - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_RATE_DISTORTION_MODEL_PARAMS_H_ diff --git a/third_party/aom/av1/encoder/ratectrl.c b/third_party/aom/av1/encoder/ratectrl.c deleted file mode 100644 index 2597fb990..000000000 --- a/third_party/aom/av1/encoder/ratectrl.c +++ /dev/null @@ -1,1776 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include -#include -#include -#include - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/mem.h" -#include "aom_ports/system_state.h" - -#include "av1/common/alloccommon.h" -#include "av1/encoder/aq_cyclicrefresh.h" -#include "av1/common/common.h" -#include "av1/common/entropymode.h" -#include "av1/common/quant_common.h" -#include "av1/common/seg_common.h" - -#include "av1/encoder/encodemv.h" -#include "av1/encoder/random.h" -#include "av1/encoder/ratectrl.h" - -// Max rate target for 1080P and below encodes under normal circumstances -// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB -#define MAX_MB_RATE 250 -#define MAXRATE_1080P 2025000 - -#define DEFAULT_KF_BOOST 2000 -#define DEFAULT_GF_BOOST 2000 - -#define MIN_BPB_FACTOR 0.005 -#define MAX_BPB_FACTOR 50 - -#define FRAME_OVERHEAD_BITS 200 -#define ASSIGN_MINQ_TABLE(bit_depth, name) \ - do { \ - switch (bit_depth) { \ - case AOM_BITS_8: name = name##_8; break; \ - case AOM_BITS_10: name = name##_10; break; \ - case AOM_BITS_12: name = name##_12; break; \ - default: \ - assert(0 && \ - "bit_depth should be AOM_BITS_8, AOM_BITS_10" \ - " or AOM_BITS_12"); \ - name = NULL; \ - } \ - } while (0) - -// Tables relating active max Q to active min Q -static int kf_low_motion_minq_8[QINDEX_RANGE]; -static int kf_high_motion_minq_8[QINDEX_RANGE]; -static int arfgf_low_motion_minq_8[QINDEX_RANGE]; -static int arfgf_high_motion_minq_8[QINDEX_RANGE]; -static int inter_minq_8[QINDEX_RANGE]; -static int rtc_minq_8[QINDEX_RANGE]; - -static int kf_low_motion_minq_10[QINDEX_RANGE]; -static int kf_high_motion_minq_10[QINDEX_RANGE]; -static int arfgf_low_motion_minq_10[QINDEX_RANGE]; -static int arfgf_high_motion_minq_10[QINDEX_RANGE]; -static int inter_minq_10[QINDEX_RANGE]; -static int rtc_minq_10[QINDEX_RANGE]; -static int kf_low_motion_minq_12[QINDEX_RANGE]; -static int kf_high_motion_minq_12[QINDEX_RANGE]; -static int arfgf_low_motion_minq_12[QINDEX_RANGE]; -static int arfgf_high_motion_minq_12[QINDEX_RANGE]; -static int inter_minq_12[QINDEX_RANGE]; -static int rtc_minq_12[QINDEX_RANGE]; - -static int gf_high = 2000; -static int gf_low = 400; -static int kf_high = 5000; -static int kf_low = 400; - -// How many times less pixels there are to encode given the current scaling. -// Temporary replacement for rcf_mult and rate_thresh_mult. -static double resize_rate_factor(const AV1_COMP *cpi, int width, int height) { - return (double)(cpi->oxcf.width * cpi->oxcf.height) / (width * height); -} - -// Functions to compute the active minq lookup table entries based on a -// formulaic approach to facilitate easier adjustment of the Q tables. -// The formulae were derived from computing a 3rd order polynomial best -// fit to the original data (after plotting real maxq vs minq (not q index)) -static int get_minq_index(double maxq, double x3, double x2, double x1, - aom_bit_depth_t bit_depth) { - int i; - const double minqtarget = AOMMIN(((x3 * maxq + x2) * maxq + x1) * maxq, maxq); - - // Special case handling to deal with the step from q2.0 - // down to lossless mode represented by q 1.0. - if (minqtarget <= 2.0) return 0; - - for (i = 0; i < QINDEX_RANGE; i++) { - if (minqtarget <= av1_convert_qindex_to_q(i, bit_depth)) return i; - } - - return QINDEX_RANGE - 1; -} - -static void init_minq_luts(int *kf_low_m, int *kf_high_m, int *arfgf_low, - int *arfgf_high, int *inter, int *rtc, - aom_bit_depth_t bit_depth) { - int i; - for (i = 0; i < QINDEX_RANGE; i++) { - const double maxq = av1_convert_qindex_to_q(i, bit_depth); - kf_low_m[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.150, bit_depth); - kf_high_m[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.45, bit_depth); - arfgf_low[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.30, bit_depth); - arfgf_high[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55, bit_depth); - inter[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.90, bit_depth); - rtc[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.70, bit_depth); - } -} - -void av1_rc_init_minq_luts(void) { - init_minq_luts(kf_low_motion_minq_8, kf_high_motion_minq_8, - arfgf_low_motion_minq_8, arfgf_high_motion_minq_8, - inter_minq_8, rtc_minq_8, AOM_BITS_8); - init_minq_luts(kf_low_motion_minq_10, kf_high_motion_minq_10, - arfgf_low_motion_minq_10, arfgf_high_motion_minq_10, - inter_minq_10, rtc_minq_10, AOM_BITS_10); - init_minq_luts(kf_low_motion_minq_12, kf_high_motion_minq_12, - arfgf_low_motion_minq_12, arfgf_high_motion_minq_12, - inter_minq_12, rtc_minq_12, AOM_BITS_12); -} - -// These functions use formulaic calculations to make playing with the -// quantizer tables easier. If necessary they can be replaced by lookup -// tables if and when things settle down in the experimental bitstream -double av1_convert_qindex_to_q(int qindex, aom_bit_depth_t bit_depth) { - // Convert the index to a real Q value (scaled down to match old Q values) - switch (bit_depth) { - case AOM_BITS_8: return av1_ac_quant_Q3(qindex, 0, bit_depth) / 4.0; - case AOM_BITS_10: return av1_ac_quant_Q3(qindex, 0, bit_depth) / 16.0; - case AOM_BITS_12: return av1_ac_quant_Q3(qindex, 0, bit_depth) / 64.0; - default: - assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); - return -1.0; - } -} - -int av1_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex, - double correction_factor, aom_bit_depth_t bit_depth) { - const double q = av1_convert_qindex_to_q(qindex, bit_depth); - int enumerator = frame_type == KEY_FRAME ? 2700000 : 1800000; - - assert(correction_factor <= MAX_BPB_FACTOR && - correction_factor >= MIN_BPB_FACTOR); - - // q based adjustment to baseline enumerator - enumerator += (int)(enumerator * q) >> 12; - return (int)(enumerator * correction_factor / q); -} - -int av1_estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs, - double correction_factor, - aom_bit_depth_t bit_depth) { - const int bpm = - (int)(av1_rc_bits_per_mb(frame_type, q, correction_factor, bit_depth)); - return AOMMAX(FRAME_OVERHEAD_BITS, - (int)((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS); -} - -int av1_rc_clamp_pframe_target_size(const AV1_COMP *const cpi, int target) { - const RATE_CONTROL *rc = &cpi->rc; - const AV1EncoderConfig *oxcf = &cpi->oxcf; - const int min_frame_target = - AOMMAX(rc->min_frame_bandwidth, rc->avg_frame_bandwidth >> 5); - // Clip the frame target to the minimum setup value. - if (cpi->rc.is_src_frame_alt_ref) { - // If there is an active ARF at this location use the minimum - // bits on this frame even if it is a constructed arf. - // The active maximum quantizer insures that an appropriate - // number of bits will be spent if needed for constructed ARFs. - target = min_frame_target; - } else if (target < min_frame_target) { - target = min_frame_target; - } - - // Clip the frame target to the maximum allowed value. - if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth; - if (oxcf->rc_max_inter_bitrate_pct) { - const int max_rate = - rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100; - target = AOMMIN(target, max_rate); - } - - return target; -} - -int av1_rc_clamp_iframe_target_size(const AV1_COMP *const cpi, int target) { - const RATE_CONTROL *rc = &cpi->rc; - const AV1EncoderConfig *oxcf = &cpi->oxcf; - if (oxcf->rc_max_intra_bitrate_pct) { - const int max_rate = - rc->avg_frame_bandwidth * oxcf->rc_max_intra_bitrate_pct / 100; - target = AOMMIN(target, max_rate); - } - if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth; - return target; -} - -// Update the buffer level: leaky bucket model. -static void update_buffer_level(AV1_COMP *cpi, int encoded_frame_size) { - const AV1_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; - - // Non-viewable frames are a special case and are treated as pure overhead. - // TODO(zoeliu): To further explore whether we should treat BWDREF_FRAME - // differently, since it is a no-show frame. - if (!cm->show_frame && !rc->is_bwd_ref_frame) - rc->bits_off_target -= encoded_frame_size; - else - rc->bits_off_target += rc->avg_frame_bandwidth - encoded_frame_size; - - // Clip the buffer level to the maximum specified buffer size. - rc->bits_off_target = AOMMIN(rc->bits_off_target, rc->maximum_buffer_size); - rc->buffer_level = rc->bits_off_target; -} - -int av1_rc_get_default_min_gf_interval(int width, int height, - double framerate) { - // Assume we do not need any constraint lower than 4K 20 fps - static const double factor_safe = 3840 * 2160 * 20.0; - const double factor = width * height * framerate; - const int default_interval = - clamp((int)(framerate * 0.125), MIN_GF_INTERVAL, MAX_GF_INTERVAL); - - if (factor <= factor_safe) - return default_interval; - else - return AOMMAX(default_interval, - (int)(MIN_GF_INTERVAL * factor / factor_safe + 0.5)); - // Note this logic makes: - // 4K24: 5 - // 4K30: 6 - // 4K60: 12 -} - -int av1_rc_get_default_max_gf_interval(double framerate, int min_gf_interval) { - int interval = AOMMIN(MAX_GF_INTERVAL, (int)(framerate * 0.75)); - interval += (interval & 0x01); // Round to even value -#if CONFIG_FIX_GF_LENGTH - interval = AOMMAX(FIXED_GF_LENGTH, interval); -#endif - return AOMMAX(interval, min_gf_interval); -} - -void av1_rc_init(const AV1EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { - int i; - - if (pass == 0 && oxcf->rc_mode == AOM_CBR) { - rc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q; - rc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; - } else { - rc->avg_frame_qindex[KEY_FRAME] = - (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2; - rc->avg_frame_qindex[INTER_FRAME] = - (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2; - } - - rc->last_q[KEY_FRAME] = oxcf->best_allowed_q; - rc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; - - rc->buffer_level = rc->starting_buffer_level; - rc->bits_off_target = rc->starting_buffer_level; - - rc->rolling_target_bits = rc->avg_frame_bandwidth; - rc->rolling_actual_bits = rc->avg_frame_bandwidth; - rc->long_rolling_target_bits = rc->avg_frame_bandwidth; - rc->long_rolling_actual_bits = rc->avg_frame_bandwidth; - - rc->total_actual_bits = 0; - rc->total_target_bits = 0; - rc->total_target_vs_actual = 0; - - rc->frames_since_key = 8; // Sensible default for first frame. - rc->this_key_frame_forced = 0; - rc->next_key_frame_forced = 0; - rc->source_alt_ref_pending = 0; - rc->source_alt_ref_active = 0; - - rc->frames_till_gf_update_due = 0; - rc->ni_av_qi = oxcf->worst_allowed_q; - rc->ni_tot_qi = 0; - rc->ni_frames = 0; - - rc->tot_q = 0.0; - rc->avg_q = av1_convert_qindex_to_q(oxcf->worst_allowed_q, oxcf->bit_depth); - - for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { - rc->rate_correction_factors[i] = 0.7; - } - rc->rate_correction_factors[KF_STD] = 1.0; - rc->min_gf_interval = oxcf->min_gf_interval; - rc->max_gf_interval = oxcf->max_gf_interval; - if (rc->min_gf_interval == 0) - rc->min_gf_interval = av1_rc_get_default_min_gf_interval( - oxcf->width, oxcf->height, oxcf->init_framerate); - if (rc->max_gf_interval == 0) - rc->max_gf_interval = av1_rc_get_default_max_gf_interval( - oxcf->init_framerate, rc->min_gf_interval); - rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2; -} - -int av1_rc_drop_frame(AV1_COMP *cpi) { - const AV1EncoderConfig *oxcf = &cpi->oxcf; - RATE_CONTROL *const rc = &cpi->rc; - - if (!oxcf->drop_frames_water_mark) { - return 0; - } else { - if (rc->buffer_level < 0) { - // Always drop if buffer is below 0. - return 1; - } else { - // If buffer is below drop_mark, for now just drop every other frame - // (starting with the next frame) until it increases back over drop_mark. - int drop_mark = - (int)(oxcf->drop_frames_water_mark * rc->optimal_buffer_level / 100); - if ((rc->buffer_level > drop_mark) && (rc->decimation_factor > 0)) { - --rc->decimation_factor; - } else if (rc->buffer_level <= drop_mark && rc->decimation_factor == 0) { - rc->decimation_factor = 1; - } - if (rc->decimation_factor > 0) { - if (rc->decimation_count > 0) { - --rc->decimation_count; - return 1; - } else { - rc->decimation_count = rc->decimation_factor; - return 0; - } - } else { - rc->decimation_count = 0; - return 0; - } - } - } -} - -static double get_rate_correction_factor(const AV1_COMP *cpi, int width, - int height) { - const RATE_CONTROL *const rc = &cpi->rc; - double rcf; - - if (cpi->common.frame_type == KEY_FRAME) { - rcf = rc->rate_correction_factors[KF_STD]; - } else if (cpi->oxcf.pass == 2) { - RATE_FACTOR_LEVEL rf_lvl = - cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index]; - rcf = rc->rate_correction_factors[rf_lvl]; - } else { - if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && - !rc->is_src_frame_alt_ref && - (cpi->oxcf.rc_mode != AOM_CBR || cpi->oxcf.gf_cbr_boost_pct > 20)) - rcf = rc->rate_correction_factors[GF_ARF_STD]; - else - rcf = rc->rate_correction_factors[INTER_NORMAL]; - } - rcf *= resize_rate_factor(cpi, width, height); - return fclamp(rcf, MIN_BPB_FACTOR, MAX_BPB_FACTOR); -} - -static void set_rate_correction_factor(AV1_COMP *cpi, double factor, int width, - int height) { - RATE_CONTROL *const rc = &cpi->rc; - - // Normalize RCF to account for the size-dependent scaling factor. - factor /= resize_rate_factor(cpi, width, height); - - factor = fclamp(factor, MIN_BPB_FACTOR, MAX_BPB_FACTOR); - - if (cpi->common.frame_type == KEY_FRAME) { - rc->rate_correction_factors[KF_STD] = factor; - } else if (cpi->oxcf.pass == 2) { - RATE_FACTOR_LEVEL rf_lvl = - cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index]; - rc->rate_correction_factors[rf_lvl] = factor; - } else { - if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && - !rc->is_src_frame_alt_ref && - (cpi->oxcf.rc_mode != AOM_CBR || cpi->oxcf.gf_cbr_boost_pct > 20)) - rc->rate_correction_factors[GF_ARF_STD] = factor; - else - rc->rate_correction_factors[INTER_NORMAL] = factor; - } -} - -void av1_rc_update_rate_correction_factors(AV1_COMP *cpi, int width, - int height) { - const AV1_COMMON *const cm = &cpi->common; - int correction_factor = 100; - double rate_correction_factor = - get_rate_correction_factor(cpi, width, height); - double adjustment_limit; - const int MBs = av1_get_MBs(width, height); - - int projected_size_based_on_q = 0; - - // Do not update the rate factors for arf overlay frames. - if (cpi->rc.is_src_frame_alt_ref) return; - - // Clear down mmx registers to allow floating point in what follows - aom_clear_system_state(); - - // Work out how big we would have expected the frame to be at this Q given - // the current correction factor. - // Stay in double to avoid int overflow when values are large - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->common.seg.enabled) { - projected_size_based_on_q = - av1_cyclic_refresh_estimate_bits_at_q(cpi, rate_correction_factor); - } else { - projected_size_based_on_q = av1_estimate_bits_at_q( - cpi->common.frame_type, cm->base_qindex, MBs, rate_correction_factor, - cm->seq_params.bit_depth); - } - // Work out a size correction factor. - if (projected_size_based_on_q > FRAME_OVERHEAD_BITS) - correction_factor = (int)((100 * (int64_t)cpi->rc.projected_frame_size) / - projected_size_based_on_q); - - // More heavily damped adjustment used if we have been oscillating either side - // of target. - if (correction_factor > 0) { - adjustment_limit = - 0.25 + 0.5 * AOMMIN(1, fabs(log10(0.01 * correction_factor))); - } else { - adjustment_limit = 0.75; - } - - cpi->rc.q_2_frame = cpi->rc.q_1_frame; - cpi->rc.q_1_frame = cm->base_qindex; - cpi->rc.rc_2_frame = cpi->rc.rc_1_frame; - if (correction_factor > 110) - cpi->rc.rc_1_frame = -1; - else if (correction_factor < 90) - cpi->rc.rc_1_frame = 1; - else - cpi->rc.rc_1_frame = 0; - - if (correction_factor > 102) { - // We are not already at the worst allowable quality - correction_factor = - (int)(100 + ((correction_factor - 100) * adjustment_limit)); - rate_correction_factor = (rate_correction_factor * correction_factor) / 100; - // Keep rate_correction_factor within limits - if (rate_correction_factor > MAX_BPB_FACTOR) - rate_correction_factor = MAX_BPB_FACTOR; - } else if (correction_factor < 99) { - // We are not already at the best allowable quality - correction_factor = - (int)(100 - ((100 - correction_factor) * adjustment_limit)); - rate_correction_factor = (rate_correction_factor * correction_factor) / 100; - - // Keep rate_correction_factor within limits - if (rate_correction_factor < MIN_BPB_FACTOR) - rate_correction_factor = MIN_BPB_FACTOR; - } - - set_rate_correction_factor(cpi, rate_correction_factor, width, height); -} - -int av1_rc_regulate_q(const AV1_COMP *cpi, int target_bits_per_frame, - int active_best_quality, int active_worst_quality, - int width, int height) { - const AV1_COMMON *const cm = &cpi->common; - int q = active_worst_quality; - int last_error = INT_MAX; - int i, target_bits_per_mb, bits_per_mb_at_this_q; - const int MBs = av1_get_MBs(width, height); - const double correction_factor = - get_rate_correction_factor(cpi, width, height); - - // Calculate required scaling factor based on target frame size and size of - // frame produced using previous Q. - target_bits_per_mb = - (int)((uint64_t)(target_bits_per_frame) << BPER_MB_NORMBITS) / MBs; - - i = active_best_quality; - - do { - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { - bits_per_mb_at_this_q = - (int)av1_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor); - } else { - bits_per_mb_at_this_q = (int)av1_rc_bits_per_mb( - cm->frame_type, i, correction_factor, cm->seq_params.bit_depth); - } - - if (bits_per_mb_at_this_q <= target_bits_per_mb) { - if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error) - q = i; - else - q = i - 1; - - break; - } else { - last_error = bits_per_mb_at_this_q - target_bits_per_mb; - } - } while (++i <= active_worst_quality); - - // In CBR mode, this makes sure q is between oscillating Qs to prevent - // resonance. - if (cpi->oxcf.rc_mode == AOM_CBR && - (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) && - cpi->rc.q_1_frame != cpi->rc.q_2_frame) { - q = clamp(q, AOMMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame), - AOMMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame)); - } - return q; -} - -static int get_active_quality(int q, int gfu_boost, int low, int high, - int *low_motion_minq, int *high_motion_minq) { - if (gfu_boost > high) { - return low_motion_minq[q]; - } else if (gfu_boost < low) { - return high_motion_minq[q]; - } else { - const int gap = high - low; - const int offset = high - gfu_boost; - const int qdiff = high_motion_minq[q] - low_motion_minq[q]; - const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap; - return low_motion_minq[q] + adjustment; - } -} - -static int get_kf_active_quality(const RATE_CONTROL *const rc, int q, - aom_bit_depth_t bit_depth) { - int *kf_low_motion_minq; - int *kf_high_motion_minq; - ASSIGN_MINQ_TABLE(bit_depth, kf_low_motion_minq); - ASSIGN_MINQ_TABLE(bit_depth, kf_high_motion_minq); - return get_active_quality(q, rc->kf_boost, kf_low, kf_high, - kf_low_motion_minq, kf_high_motion_minq); -} - -static int get_gf_active_quality(const RATE_CONTROL *const rc, int q, - aom_bit_depth_t bit_depth) { - int *arfgf_low_motion_minq; - int *arfgf_high_motion_minq; - ASSIGN_MINQ_TABLE(bit_depth, arfgf_low_motion_minq); - ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq); - return get_active_quality(q, rc->gfu_boost, gf_low, gf_high, - arfgf_low_motion_minq, arfgf_high_motion_minq); -} - -#if REDUCE_LAST_ALT_BOOST -static int get_gf_high_motion_quality(int q, aom_bit_depth_t bit_depth) { - int *arfgf_high_motion_minq; - ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq); - return arfgf_high_motion_minq[q]; -} -#endif - -static int calc_active_worst_quality_one_pass_vbr(const AV1_COMP *cpi) { - const RATE_CONTROL *const rc = &cpi->rc; - const unsigned int curr_frame = cpi->common.current_video_frame; - int active_worst_quality; - - if (cpi->common.frame_type == KEY_FRAME) { - active_worst_quality = - curr_frame == 0 ? rc->worst_quality : rc->last_q[KEY_FRAME] * 2; - } else { - if (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt2_ref_frame || - cpi->refresh_alt_ref_frame)) { - active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 5 / 4 - : rc->last_q[INTER_FRAME]; - } else { - active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 2 - : rc->last_q[INTER_FRAME] * 2; - } - } - return AOMMIN(active_worst_quality, rc->worst_quality); -} - -// Adjust active_worst_quality level based on buffer level. -static int calc_active_worst_quality_one_pass_cbr(const AV1_COMP *cpi) { - // Adjust active_worst_quality: If buffer is above the optimal/target level, - // bring active_worst_quality down depending on fullness of buffer. - // If buffer is below the optimal level, let the active_worst_quality go from - // ambient Q (at buffer = optimal level) to worst_quality level - // (at buffer = critical level). - const AV1_COMMON *const cm = &cpi->common; - const RATE_CONTROL *rc = &cpi->rc; - // Buffer level below which we push active_worst to worst_quality. - int64_t critical_level = rc->optimal_buffer_level >> 3; - int64_t buff_lvl_step = 0; - int adjustment = 0; - int active_worst_quality; - int ambient_qp; - if (cm->frame_type == KEY_FRAME) return rc->worst_quality; - // For ambient_qp we use minimum of avg_frame_qindex[KEY_FRAME/INTER_FRAME] - // for the first few frames following key frame. These are both initialized - // to worst_quality and updated with (3/4, 1/4) average in postencode_update. - // So for first few frames following key, the qp of that key frame is weighted - // into the active_worst_quality setting. - ambient_qp = (cm->current_video_frame < 5) - ? AOMMIN(rc->avg_frame_qindex[INTER_FRAME], - rc->avg_frame_qindex[KEY_FRAME]) - : rc->avg_frame_qindex[INTER_FRAME]; - active_worst_quality = AOMMIN(rc->worst_quality, ambient_qp * 5 / 4); - if (rc->buffer_level > rc->optimal_buffer_level) { - // Adjust down. - // Maximum limit for down adjustment, ~30%. - int max_adjustment_down = active_worst_quality / 3; - if (max_adjustment_down) { - buff_lvl_step = ((rc->maximum_buffer_size - rc->optimal_buffer_level) / - max_adjustment_down); - if (buff_lvl_step) - adjustment = (int)((rc->buffer_level - rc->optimal_buffer_level) / - buff_lvl_step); - active_worst_quality -= adjustment; - } - } else if (rc->buffer_level > critical_level) { - // Adjust up from ambient Q. - if (critical_level) { - buff_lvl_step = (rc->optimal_buffer_level - critical_level); - if (buff_lvl_step) { - adjustment = (int)((rc->worst_quality - ambient_qp) * - (rc->optimal_buffer_level - rc->buffer_level) / - buff_lvl_step); - } - active_worst_quality = ambient_qp + adjustment; - } - } else { - // Set to worst_quality if buffer is below critical level. - active_worst_quality = rc->worst_quality; - } - return active_worst_quality; -} - -static int rc_pick_q_and_bounds_one_pass_cbr(const AV1_COMP *cpi, int width, - int height, int *bottom_index, - int *top_index) { - const AV1_COMMON *const cm = &cpi->common; - const RATE_CONTROL *const rc = &cpi->rc; - int active_best_quality; - int active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi); - int q; - int *rtc_minq; - const int bit_depth = cm->seq_params.bit_depth; - ASSIGN_MINQ_TABLE(bit_depth, rtc_minq); - - if (frame_is_intra_only(cm)) { - active_best_quality = rc->best_quality; - // Handle the special case for key frames forced when we have reached - // the maximum key frame interval. Here force the Q to a range - // based on the ambient Q to reduce the risk of popping. - if (rc->this_key_frame_forced) { - int qindex = rc->last_boosted_qindex; - double last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth); - int delta_qindex = av1_compute_qdelta(rc, last_boosted_q, - (last_boosted_q * 0.75), bit_depth); - active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality); - } else if (cm->current_video_frame > 0) { - // not first frame of one pass and kf_boost is set - double q_adj_factor = 1.0; - double q_val; - - active_best_quality = - get_kf_active_quality(rc, rc->avg_frame_qindex[KEY_FRAME], bit_depth); - - // Allow somewhat lower kf minq with small image formats. - if ((width * height) <= (352 * 288)) { - q_adj_factor -= 0.25; - } - - // Convert the adjustment factor to a qindex delta - // on active_best_quality. - q_val = av1_convert_qindex_to_q(active_best_quality, bit_depth); - active_best_quality += - av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, bit_depth); - } - } else if (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { - // Use the lower of active_worst_quality and recent - // average Q as basis for GF/ARF best Q limit unless last frame was - // a key frame. - if (rc->frames_since_key > 1 && - rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) { - q = rc->avg_frame_qindex[INTER_FRAME]; - } else { - q = active_worst_quality; - } - active_best_quality = get_gf_active_quality(rc, q, bit_depth); - } else { - // Use the lower of active_worst_quality and recent/average Q. - if (cm->current_video_frame > 1) { - if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) - active_best_quality = rtc_minq[rc->avg_frame_qindex[INTER_FRAME]]; - else - active_best_quality = rtc_minq[active_worst_quality]; - } else { - if (rc->avg_frame_qindex[KEY_FRAME] < active_worst_quality) - active_best_quality = rtc_minq[rc->avg_frame_qindex[KEY_FRAME]]; - else - active_best_quality = rtc_minq[active_worst_quality]; - } - } - - // Clip the active best and worst quality values to limits - active_best_quality = - clamp(active_best_quality, rc->best_quality, rc->worst_quality); - active_worst_quality = - clamp(active_worst_quality, active_best_quality, rc->worst_quality); - - *top_index = active_worst_quality; - *bottom_index = active_best_quality; - - // Limit Q range for the adaptive loop. - if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced && - !(cm->current_video_frame == 0)) { - int qdelta = 0; - aom_clear_system_state(); - qdelta = av1_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, - active_worst_quality, 2.0, bit_depth); - *top_index = active_worst_quality + qdelta; - *top_index = AOMMAX(*top_index, *bottom_index); - } - - // Special case code to try and match quality with forced key frames - if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) { - q = rc->last_boosted_qindex; - } else { - q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality, - active_worst_quality, width, height); - if (q > *top_index) { - // Special case when we are targeting the max allowed rate - if (rc->this_frame_target >= rc->max_frame_bandwidth) - *top_index = q; - else - q = *top_index; - } - } - - assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality); - assert(*bottom_index <= rc->worst_quality && - *bottom_index >= rc->best_quality); - assert(q <= rc->worst_quality && q >= rc->best_quality); - return q; -} - -static int get_active_cq_level(const RATE_CONTROL *rc, - const AV1EncoderConfig *const oxcf) { - static const double cq_adjust_threshold = 0.1; - int active_cq_level = oxcf->cq_level; - if (oxcf->rc_mode == AOM_CQ && rc->total_target_bits > 0) { - const double x = (double)rc->total_actual_bits / rc->total_target_bits; - if (x < cq_adjust_threshold) { - active_cq_level = (int)(active_cq_level * x / cq_adjust_threshold); - } - } - return active_cq_level; -} - -static int rc_pick_q_and_bounds_one_pass_vbr(const AV1_COMP *cpi, int width, - int height, int *bottom_index, - int *top_index) { - const AV1_COMMON *const cm = &cpi->common; - const RATE_CONTROL *const rc = &cpi->rc; - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - const int cq_level = get_active_cq_level(rc, oxcf); - int active_best_quality; - int active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi); - int q; - int *inter_minq; - const int bit_depth = cm->seq_params.bit_depth; - ASSIGN_MINQ_TABLE(bit_depth, inter_minq); - - if (frame_is_intra_only(cm)) { - if (oxcf->rc_mode == AOM_Q) { - const int qindex = cq_level; - const double q_val = av1_convert_qindex_to_q(qindex, bit_depth); - const int delta_qindex = - av1_compute_qdelta(rc, q_val, q_val * 0.25, bit_depth); - active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality); - } else if (rc->this_key_frame_forced) { - const int qindex = rc->last_boosted_qindex; - const double last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth); - const int delta_qindex = av1_compute_qdelta( - rc, last_boosted_q, last_boosted_q * 0.75, bit_depth); - active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality); - } else { // not first frame of one pass and kf_boost is set - double q_adj_factor = 1.0; - - active_best_quality = - get_kf_active_quality(rc, rc->avg_frame_qindex[KEY_FRAME], bit_depth); - - // Allow somewhat lower kf minq with small image formats. - if ((width * height) <= (352 * 288)) { - q_adj_factor -= 0.25; - } - - // Convert the adjustment factor to a qindex delta on active_best_quality. - { - const double q_val = - av1_convert_qindex_to_q(active_best_quality, bit_depth); - active_best_quality += - av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, bit_depth); - } - } - } else if (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { - // Use the lower of active_worst_quality and recent - // average Q as basis for GF/ARF best Q limit unless last frame was - // a key frame. - q = (rc->frames_since_key > 1 && - rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) - ? rc->avg_frame_qindex[INTER_FRAME] - : rc->avg_frame_qindex[KEY_FRAME]; - // For constrained quality dont allow Q less than the cq level - if (oxcf->rc_mode == AOM_CQ) { - if (q < cq_level) q = cq_level; - active_best_quality = get_gf_active_quality(rc, q, bit_depth); - // Constrained quality use slightly lower active best. - active_best_quality = active_best_quality * 15 / 16; - } else if (oxcf->rc_mode == AOM_Q) { - const int qindex = cq_level; - const double q_val = av1_convert_qindex_to_q(qindex, bit_depth); - const int delta_qindex = - (cpi->refresh_alt_ref_frame) - ? av1_compute_qdelta(rc, q_val, q_val * 0.40, bit_depth) - : av1_compute_qdelta(rc, q_val, q_val * 0.50, bit_depth); - active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality); - } else { - active_best_quality = get_gf_active_quality(rc, q, bit_depth); - } - } else { - if (oxcf->rc_mode == AOM_Q) { - const int qindex = cq_level; - const double q_val = av1_convert_qindex_to_q(qindex, bit_depth); - const double delta_rate[FIXED_GF_INTERVAL] = { 0.50, 1.0, 0.85, 1.0, - 0.70, 1.0, 0.85, 1.0 }; - const int delta_qindex = av1_compute_qdelta( - rc, q_val, - q_val * delta_rate[cm->current_video_frame % FIXED_GF_INTERVAL], - bit_depth); - active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality); - } else { - // Use the lower of active_worst_quality and recent/average Q. - active_best_quality = (cm->current_video_frame > 1) - ? inter_minq[rc->avg_frame_qindex[INTER_FRAME]] - : inter_minq[rc->avg_frame_qindex[KEY_FRAME]]; - // For the constrained quality mode we don't want - // q to fall below the cq level. - if ((oxcf->rc_mode == AOM_CQ) && (active_best_quality < cq_level)) { - active_best_quality = cq_level; - } - } - } - - // Clip the active best and worst quality values to limits - active_best_quality = - clamp(active_best_quality, rc->best_quality, rc->worst_quality); - active_worst_quality = - clamp(active_worst_quality, active_best_quality, rc->worst_quality); - - *top_index = active_worst_quality; - *bottom_index = active_best_quality; - - // Limit Q range for the adaptive loop. - { - int qdelta = 0; - aom_clear_system_state(); - if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced && - !(cm->current_video_frame == 0)) { - qdelta = av1_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, - active_worst_quality, 2.0, bit_depth); - } else if (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { - qdelta = av1_compute_qdelta_by_rate( - &cpi->rc, cm->frame_type, active_worst_quality, 1.75, bit_depth); - } - *top_index = active_worst_quality + qdelta; - *top_index = AOMMAX(*top_index, *bottom_index); - } - - if (oxcf->rc_mode == AOM_Q) { - q = active_best_quality; - // Special case code to try and match quality with forced key frames - } else if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced) { - q = rc->last_boosted_qindex; - } else { - q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality, - active_worst_quality, width, height); - if (q > *top_index) { - // Special case when we are targeting the max allowed rate - if (rc->this_frame_target >= rc->max_frame_bandwidth) - *top_index = q; - else - q = *top_index; - } - } - - assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality); - assert(*bottom_index <= rc->worst_quality && - *bottom_index >= rc->best_quality); - assert(q <= rc->worst_quality && q >= rc->best_quality); - return q; -} - -int av1_frame_type_qdelta(const AV1_COMP *cpi, int rf_level, int q) { - static const FRAME_TYPE frame_type[RATE_FACTOR_LEVELS] = { - INTER_FRAME, INTER_FRAME, INTER_FRAME, INTER_FRAME, INTER_FRAME, KEY_FRAME - }; - const AV1_COMMON *const cm = &cpi->common; - int qdelta = av1_compute_qdelta_by_rate(&cpi->rc, frame_type[rf_level], q, - rate_factor_deltas[rf_level], - cm->seq_params.bit_depth); - return qdelta; -} - -#define STATIC_MOTION_THRESH 95 -static int rc_pick_q_and_bounds_two_pass(const AV1_COMP *cpi, int width, - int height, int *bottom_index, - int *top_index, int *arf_q) { - const AV1_COMMON *const cm = &cpi->common; - const RATE_CONTROL *const rc = &cpi->rc; - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - const GF_GROUP *gf_group = &cpi->twopass.gf_group; - const int cq_level = get_active_cq_level(rc, oxcf); - int active_best_quality; - int active_worst_quality = cpi->twopass.active_worst_quality; - int q; - int *inter_minq; - const int bit_depth = cm->seq_params.bit_depth; - ASSIGN_MINQ_TABLE(bit_depth, inter_minq); - -#if CUSTOMIZED_GF - const int is_intrl_arf_boost = - gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE; -#else - const int is_intrl_arf_boost = cpi->refresh_alt2_ref_frame; -#endif // CUSTOMIZED_GF - - if (frame_is_intra_only(cm)) { - // Handle the special case for key frames forced when we have reached - // the maximum key frame interval. Here force the Q to a range - // based on the ambient Q to reduce the risk of popping. - if (rc->this_key_frame_forced) { - double last_boosted_q; - int delta_qindex; - int qindex; - - if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) { - qindex = AOMMIN(rc->last_kf_qindex, rc->last_boosted_qindex); - active_best_quality = qindex; - last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth); - delta_qindex = av1_compute_qdelta(rc, last_boosted_q, - last_boosted_q * 1.25, bit_depth); - active_worst_quality = - AOMMIN(qindex + delta_qindex, active_worst_quality); - } else { - qindex = rc->last_boosted_qindex; - last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth); - delta_qindex = av1_compute_qdelta(rc, last_boosted_q, - last_boosted_q * 0.5, bit_depth); - active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality); - } - } else { - // Not forced keyframe. - double q_adj_factor = 1.0; - double q_val; - - // Baseline value derived from cpi->active_worst_quality and kf boost. - active_best_quality = - get_kf_active_quality(rc, active_worst_quality, bit_depth); - - // Allow somewhat lower kf minq with small image formats. - if ((width * height) <= (352 * 288)) { - q_adj_factor -= 0.25; - } - - // Make a further adjustment based on the kf zero motion measure. - q_adj_factor += 0.05 - (0.001 * (double)cpi->twopass.kf_zeromotion_pct); - - // Convert the adjustment factor to a qindex delta - // on active_best_quality. - q_val = av1_convert_qindex_to_q(active_best_quality, bit_depth); - active_best_quality += - av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, bit_depth); - } - } else if (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || is_intrl_arf_boost || - cpi->refresh_alt_ref_frame)) { - // Use the lower of active_worst_quality and recent - // average Q as basis for GF/ARF best Q limit unless last frame was - // a key frame. - if (rc->frames_since_key > 1 && - rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) { - q = rc->avg_frame_qindex[INTER_FRAME]; - } else { - q = active_worst_quality; - } - // For constrained quality dont allow Q less than the cq level - if (oxcf->rc_mode == AOM_CQ) { - if (q < cq_level) q = cq_level; -#if USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ - if (gf_group->update_type[gf_group->index] == ARF_UPDATE || - (is_intrl_arf_boost && !cpi->new_bwdref_update_rule)) { -#endif // USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ - active_best_quality = get_gf_active_quality(rc, q, bit_depth); - - // Constrained quality use slightly lower active best. - active_best_quality = active_best_quality * 15 / 16; -#if REDUCE_LAST_ALT_BOOST - if (gf_group->update_type[gf_group->index] == ARF_UPDATE) { - const int min_boost = get_gf_high_motion_quality(q, bit_depth); - const int boost = min_boost - active_best_quality; - - active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor); - } -#endif - *arf_q = active_best_quality; -#if USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ - } else { - active_best_quality = rc->arf_q; - int this_height = gf_group->pyramid_level[gf_group->index]; - while (this_height < gf_group->pyramid_height) { - active_best_quality = (active_best_quality + cq_level + 1) / 2; - ++this_height; - } - } -#endif // USE_SYMM_MULTI_LAYER && MULTI_LVL_BOOST_VBR_CQ - } else if (oxcf->rc_mode == AOM_Q) { - if (!cpi->refresh_alt_ref_frame && !is_intrl_arf_boost) { - active_best_quality = cq_level; - } else { - if (gf_group->update_type[gf_group->index] == ARF_UPDATE) { - active_best_quality = get_gf_active_quality(rc, q, bit_depth); - *arf_q = active_best_quality; -#if REDUCE_LAST_ALT_BOOST - const int min_boost = get_gf_high_motion_quality(q, bit_depth); - const int boost = min_boost - active_best_quality; - - active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor); -#endif - } else { - active_best_quality = rc->arf_q; - } -#if USE_SYMM_MULTI_LAYER - if (cpi->new_bwdref_update_rule && is_intrl_arf_boost) { - int this_height = gf_group->pyramid_level[gf_group->index]; - while (this_height < gf_group->pyramid_height) { - active_best_quality = (active_best_quality + cq_level + 1) / 2; - ++this_height; - } - } else { -#endif - // Modify best quality for second level arfs. For mode AOM_Q this - // becomes the baseline frame q. - if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW) - active_best_quality = (active_best_quality + cq_level + 1) / 2; -#if USE_SYMM_MULTI_LAYER - } -#endif - } - } else { - active_best_quality = get_gf_active_quality(rc, q, bit_depth); -#if REDUCE_LAST_ALT_BOOST - const int min_boost = get_gf_high_motion_quality(q, bit_depth); - const int boost = min_boost - active_best_quality; - - active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor); -#endif -#if USE_SYMM_MULTI_LAYER - if (cpi->new_bwdref_update_rule && is_intrl_arf_boost) { - int this_height = gf_group->pyramid_level[gf_group->index]; - while (this_height < gf_group->pyramid_height) { - active_best_quality = - (active_best_quality + active_worst_quality + 1) / 2; - ++this_height; - } - } -#endif - } - } else { - if (oxcf->rc_mode == AOM_Q) { - active_best_quality = cq_level; - } else { - active_best_quality = inter_minq[active_worst_quality]; - - // For the constrained quality mode we don't want - // q to fall below the cq level. - if ((oxcf->rc_mode == AOM_CQ) && (active_best_quality < cq_level)) { - active_best_quality = cq_level; - } - } - } - - // Extension to max or min Q if undershoot or overshoot is outside - // the permitted range. - if ((cpi->oxcf.rc_mode != AOM_Q) && - (cpi->twopass.gf_zeromotion_pct < VLOW_MOTION_THRESHOLD)) { - if (frame_is_intra_only(cm) || - (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || is_intrl_arf_boost || - cpi->refresh_alt_ref_frame))) { - active_best_quality -= - (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast); - active_worst_quality += (cpi->twopass.extend_maxq / 2); - } else { - active_best_quality -= - (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2; - active_worst_quality += cpi->twopass.extend_maxq; - } - } - - aom_clear_system_state(); - // Static forced key frames Q restrictions dealt with elsewhere. - if (!(frame_is_intra_only(cm)) || !rc->this_key_frame_forced || - (cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH)) { - int qdelta = av1_frame_type_qdelta(cpi, gf_group->rf_level[gf_group->index], - active_worst_quality); - active_worst_quality = - AOMMAX(active_worst_quality + qdelta, active_best_quality); - } - - // Modify active_best_quality for downscaled normal frames. - if (av1_frame_scaled(cm) && !frame_is_kf_gf_arf(cpi)) { - int qdelta = av1_compute_qdelta_by_rate( - rc, cm->frame_type, active_best_quality, 2.0, bit_depth); - active_best_quality = - AOMMAX(active_best_quality + qdelta, rc->best_quality); - } - - active_best_quality = - clamp(active_best_quality, rc->best_quality, rc->worst_quality); - active_worst_quality = - clamp(active_worst_quality, active_best_quality, rc->worst_quality); - - if (oxcf->rc_mode == AOM_Q) { - q = active_best_quality; - // Special case code to try and match quality with forced key frames. - } else if (frame_is_intra_only(cm) && rc->this_key_frame_forced) { - // If static since last kf use better of last boosted and last kf q. - if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) { - q = AOMMIN(rc->last_kf_qindex, rc->last_boosted_qindex); - } else { - q = AOMMIN(rc->last_boosted_qindex, - (active_best_quality + active_worst_quality) / 2); - } - } else { - q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality, - active_worst_quality, width, height); - if (q > active_worst_quality) { - // Special case when we are targeting the max allowed rate. - if (rc->this_frame_target >= rc->max_frame_bandwidth) - active_worst_quality = q; - else - q = active_worst_quality; - } - } - clamp(q, active_best_quality, active_worst_quality); - - *top_index = active_worst_quality; - *bottom_index = active_best_quality; - - assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality); - assert(*bottom_index <= rc->worst_quality && - *bottom_index >= rc->best_quality); - assert(q <= rc->worst_quality && q >= rc->best_quality); - return q; -} - -int av1_rc_pick_q_and_bounds(AV1_COMP *cpi, int width, int height, - int *bottom_index, int *top_index) { - int q; - if (cpi->oxcf.pass == 0) { - if (cpi->oxcf.rc_mode == AOM_CBR) - q = rc_pick_q_and_bounds_one_pass_cbr(cpi, width, height, bottom_index, - top_index); - else - q = rc_pick_q_and_bounds_one_pass_vbr(cpi, width, height, bottom_index, - top_index); - } else { - assert(cpi->oxcf.pass == 2 && "invalid encode pass"); - - GF_GROUP *gf_group = &cpi->twopass.gf_group; - int arf_q = 0; - - q = rc_pick_q_and_bounds_two_pass(cpi, width, height, bottom_index, - top_index, &arf_q); - - if (gf_group->update_type[gf_group->index] == ARF_UPDATE) { - cpi->rc.arf_q = arf_q; - } - } - - return q; -} - -void av1_rc_compute_frame_size_bounds(const AV1_COMP *cpi, int frame_target, - int *frame_under_shoot_limit, - int *frame_over_shoot_limit) { - if (cpi->oxcf.rc_mode == AOM_Q) { - *frame_under_shoot_limit = 0; - *frame_over_shoot_limit = INT_MAX; - } else { - // For very small rate targets where the fractional adjustment - // may be tiny make sure there is at least a minimum range. - const int tolerance = (cpi->sf.recode_tolerance * frame_target) / 100; - *frame_under_shoot_limit = AOMMAX(frame_target - tolerance - 200, 0); - *frame_over_shoot_limit = - AOMMIN(frame_target + tolerance + 200, cpi->rc.max_frame_bandwidth); - } -} - -static void rc_set_frame_target(AV1_COMP *cpi, int target, int width, - int height) { - const AV1_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; - - rc->this_frame_target = target; - - // Modify frame size target when down-scaled. - if (av1_frame_scaled(cm)) - rc->this_frame_target = - (int)(rc->this_frame_target * resize_rate_factor(cpi, width, height)); - - // Target rate per SB64 (including partial SB64s. - rc->sb64_target_rate = - (int)((int64_t)rc->this_frame_target * 64 * 64) / (width * height); -} - -static void update_alt_ref_frame_stats(AV1_COMP *cpi) { - // this frame refreshes means next frames don't unless specified by user - RATE_CONTROL *const rc = &cpi->rc; - rc->frames_since_golden = 0; - - // Mark the alt ref as done (setting to 0 means no further alt refs pending). - rc->source_alt_ref_pending = 0; - - // Set the alternate reference frame active flag - rc->source_alt_ref_active = 1; -} - -static void update_golden_frame_stats(AV1_COMP *cpi) { - RATE_CONTROL *const rc = &cpi->rc; -#if CUSTOMIZED_GF - const TWO_PASS *const twopass = &cpi->twopass; - const GF_GROUP *const gf_group = &twopass->gf_group; - const int is_intrnl_arf = - cpi->oxcf.pass == 2 - ? gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE - : cpi->refresh_alt2_ref_frame; -#else - const int is_intnl_arf = cpi->refresh_alt2_ref_frame; -#endif - - // Update the Golden frame usage counts. - // NOTE(weitinglin): If we use show_existing_frame for an OVERLAY frame, - // only the virtual indices for the reference frame will be - // updated and cpi->refresh_golden_frame will still be zero. - if (cpi->refresh_golden_frame || rc->is_src_frame_alt_ref) { - // We will not use internal overlay frames to replace the golden frame - if (!rc->is_src_frame_ext_arf) - // this frame refreshes means next frames don't unless specified by user - rc->frames_since_golden = 0; - - // If we are not using alt ref in the up and coming group clear the arf - // active flag. In multi arf group case, if the index is not 0 then - // we are overlaying a mid group arf so should not reset the flag. - if (cpi->oxcf.pass == 2) { - if (!rc->source_alt_ref_pending && (cpi->twopass.gf_group.index == 0)) - rc->source_alt_ref_active = 0; - } else if (!rc->source_alt_ref_pending) { - rc->source_alt_ref_active = 0; - } - } else if (!cpi->refresh_alt_ref_frame && !is_intrnl_arf) { - rc->frames_since_golden++; - } -} - -void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) { - const AV1_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; -#if CUSTOMIZED_GF - const TWO_PASS *const twopass = &cpi->twopass; - const GF_GROUP *const gf_group = &twopass->gf_group; - const int is_intrnl_arf = - cpi->oxcf.pass == 2 - ? gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE - : cpi->refresh_alt2_ref_frame; -#else - const int is_intrnl_arf = cpi->refresh_alt2_ref_frame; -#endif - - const int qindex = cm->base_qindex; - - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) { - av1_cyclic_refresh_postencode(cpi); - } - - // Update rate control heuristics - rc->projected_frame_size = (int)(bytes_used << 3); - - // Post encode loop adjustment of Q prediction. - av1_rc_update_rate_correction_factors(cpi, cm->width, cm->height); - - // Keep a record of last Q and ambient average Q. - if (cm->frame_type == KEY_FRAME) { - rc->last_q[KEY_FRAME] = qindex; - rc->avg_frame_qindex[KEY_FRAME] = - ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2); - } else { - if (!rc->is_src_frame_alt_ref && - !(cpi->refresh_golden_frame || is_intrnl_arf || - cpi->refresh_alt_ref_frame)) { - rc->last_q[INTER_FRAME] = qindex; - rc->avg_frame_qindex[INTER_FRAME] = - ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2); - rc->ni_frames++; - rc->tot_q += av1_convert_qindex_to_q(qindex, cm->seq_params.bit_depth); - rc->avg_q = rc->tot_q / rc->ni_frames; - // Calculate the average Q for normal inter frames (not key or GFU - // frames). - rc->ni_tot_qi += qindex; - rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames; - } - } - - // Keep record of last boosted (KF/GF/ARF) Q value. - // If the current frame is coded at a lower Q then we also update it. - // If all mbs in this group are skipped only update if the Q value is - // better than that already stored. - // This is used to help set quality in forced key frames to reduce popping - if ((qindex < rc->last_boosted_qindex) || (cm->frame_type == KEY_FRAME) || - (!rc->constrained_gf_group && - (cpi->refresh_alt_ref_frame || is_intrnl_arf || - (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) { - rc->last_boosted_qindex = qindex; - } - if (cm->frame_type == KEY_FRAME) rc->last_kf_qindex = qindex; - - update_buffer_level(cpi, rc->projected_frame_size); - - // Rolling monitors of whether we are over or underspending used to help - // regulate min and Max Q in two pass. - if (av1_frame_scaled(cm)) - rc->this_frame_target = - (int)(rc->this_frame_target / - resize_rate_factor(cpi, cm->width, cm->height)); - if (cm->frame_type != KEY_FRAME) { - rc->rolling_target_bits = ROUND_POWER_OF_TWO( - rc->rolling_target_bits * 3 + rc->this_frame_target, 2); - rc->rolling_actual_bits = ROUND_POWER_OF_TWO( - rc->rolling_actual_bits * 3 + rc->projected_frame_size, 2); - rc->long_rolling_target_bits = ROUND_POWER_OF_TWO( - rc->long_rolling_target_bits * 31 + rc->this_frame_target, 5); - rc->long_rolling_actual_bits = ROUND_POWER_OF_TWO( - rc->long_rolling_actual_bits * 31 + rc->projected_frame_size, 5); - } - - // Actual bits spent - rc->total_actual_bits += rc->projected_frame_size; - // TODO(zoeliu): To investigate whether we should treat BWDREF_FRAME - // differently here for rc->avg_frame_bandwidth. - rc->total_target_bits += - (cm->show_frame || rc->is_bwd_ref_frame) ? rc->avg_frame_bandwidth : 0; - - rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits; - - if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame && - (cm->frame_type != KEY_FRAME)) - // Update the alternate reference frame stats as appropriate. - update_alt_ref_frame_stats(cpi); - else - // Update the Golden frame stats as appropriate. - update_golden_frame_stats(cpi); - - if (cm->frame_type == KEY_FRAME) rc->frames_since_key = 0; - // if (cm->current_video_frame == 1 && cm->show_frame) - /* - rc->this_frame_target = - (int)(rc->this_frame_target / resize_rate_factor(cpi, cm->width, - cm->height)); - */ -} - -void av1_rc_postencode_update_drop_frame(AV1_COMP *cpi) { - // Update buffer level with zero size, update frame counters, and return. - update_buffer_level(cpi, 0); - cpi->rc.frames_since_key++; - cpi->rc.frames_to_key--; - cpi->rc.rc_2_frame = 0; - cpi->rc.rc_1_frame = 0; -} - -// Use this macro to turn on/off use of alt-refs in one-pass mode. -#define USE_ALTREF_FOR_ONE_PASS 1 - -static int calc_pframe_target_size_one_pass_vbr(const AV1_COMP *const cpi) { - static const int af_ratio = 10; - const RATE_CONTROL *const rc = &cpi->rc; - int target; -#if USE_ALTREF_FOR_ONE_PASS - target = - (!rc->is_src_frame_alt_ref && - (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) - ? (rc->avg_frame_bandwidth * rc->baseline_gf_interval * af_ratio) / - (rc->baseline_gf_interval + af_ratio - 1) - : (rc->avg_frame_bandwidth * rc->baseline_gf_interval) / - (rc->baseline_gf_interval + af_ratio - 1); -#else - target = rc->avg_frame_bandwidth; -#endif - return av1_rc_clamp_pframe_target_size(cpi, target); -} - -static int calc_iframe_target_size_one_pass_vbr(const AV1_COMP *const cpi) { - static const int kf_ratio = 25; - const RATE_CONTROL *rc = &cpi->rc; - const int target = rc->avg_frame_bandwidth * kf_ratio; - return av1_rc_clamp_iframe_target_size(cpi, target); -} - -void av1_rc_get_one_pass_vbr_params(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; - int target; - int altref_enabled = is_altref_enabled(cpi); - int sframe_dist = cpi->oxcf.sframe_dist; - int sframe_mode = cpi->oxcf.sframe_mode; - int sframe_enabled = cpi->oxcf.sframe_enabled; - // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic. - if (!cpi->refresh_alt_ref_frame && - (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) || - rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) { - cm->frame_type = KEY_FRAME; - rc->this_key_frame_forced = - cm->current_video_frame != 0 && rc->frames_to_key == 0; - rc->frames_to_key = cpi->oxcf.key_freq; - rc->kf_boost = DEFAULT_KF_BOOST; - rc->source_alt_ref_active = 0; - } else { - cm->frame_type = INTER_FRAME; - if (sframe_enabled) { - if (altref_enabled) { - if (sframe_mode == 1) { - // sframe_mode == 1: insert sframe if it matches altref frame. - - if (cm->current_video_frame % sframe_dist == 0 && - cm->frame_type != KEY_FRAME && cm->current_video_frame != 0 && - cpi->refresh_alt_ref_frame) { - cm->frame_type = S_FRAME; - } - } else { - // sframe_mode != 1: if sframe will be inserted at the next available - // altref frame - - if (cm->current_video_frame % sframe_dist == 0 && - cm->frame_type != KEY_FRAME && cm->current_video_frame != 0) { - rc->sframe_due = 1; - } - - if (rc->sframe_due && cpi->refresh_alt_ref_frame) { - cm->frame_type = S_FRAME; - rc->sframe_due = 0; - } - } - } else { - if (cm->current_video_frame % sframe_dist == 0 && - cm->frame_type != KEY_FRAME && cm->current_video_frame != 0) { - cm->frame_type = S_FRAME; - } - } - } - } - if (rc->frames_till_gf_update_due == 0) { - rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2; - rc->frames_till_gf_update_due = rc->baseline_gf_interval; - // NOTE: frames_till_gf_update_due must be <= frames_to_key. - if (rc->frames_till_gf_update_due > rc->frames_to_key) { - rc->frames_till_gf_update_due = rc->frames_to_key; - rc->constrained_gf_group = 1; - } else { - rc->constrained_gf_group = 0; - } - cpi->refresh_golden_frame = 1; - rc->source_alt_ref_pending = USE_ALTREF_FOR_ONE_PASS; - rc->gfu_boost = DEFAULT_GF_BOOST; - } - - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) - av1_cyclic_refresh_update_parameters(cpi); - - if (cm->frame_type == KEY_FRAME) - target = calc_iframe_target_size_one_pass_vbr(cpi); - else - target = calc_pframe_target_size_one_pass_vbr(cpi); - rc_set_frame_target(cpi, target, cm->width, cm->height); -} - -static int calc_pframe_target_size_one_pass_cbr(const AV1_COMP *cpi) { - const AV1EncoderConfig *oxcf = &cpi->oxcf; - const RATE_CONTROL *rc = &cpi->rc; - const int64_t diff = rc->optimal_buffer_level - rc->buffer_level; - const int64_t one_pct_bits = 1 + rc->optimal_buffer_level / 100; - int min_frame_target = - AOMMAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS); - int target; - - if (oxcf->gf_cbr_boost_pct) { - const int af_ratio_pct = oxcf->gf_cbr_boost_pct + 100; - target = cpi->refresh_golden_frame - ? (rc->avg_frame_bandwidth * rc->baseline_gf_interval * - af_ratio_pct) / - (rc->baseline_gf_interval * 100 + af_ratio_pct - 100) - : (rc->avg_frame_bandwidth * rc->baseline_gf_interval * 100) / - (rc->baseline_gf_interval * 100 + af_ratio_pct - 100); - } else { - target = rc->avg_frame_bandwidth; - } - - if (diff > 0) { - // Lower the target bandwidth for this frame. - const int pct_low = (int)AOMMIN(diff / one_pct_bits, oxcf->under_shoot_pct); - target -= (target * pct_low) / 200; - } else if (diff < 0) { - // Increase the target bandwidth for this frame. - const int pct_high = - (int)AOMMIN(-diff / one_pct_bits, oxcf->over_shoot_pct); - target += (target * pct_high) / 200; - } - if (oxcf->rc_max_inter_bitrate_pct) { - const int max_rate = - rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100; - target = AOMMIN(target, max_rate); - } - return AOMMAX(min_frame_target, target); -} - -static int calc_iframe_target_size_one_pass_cbr(const AV1_COMP *cpi) { - const RATE_CONTROL *rc = &cpi->rc; - int target; - if (cpi->common.current_video_frame == 0) { - target = ((rc->starting_buffer_level / 2) > INT_MAX) - ? INT_MAX - : (int)(rc->starting_buffer_level / 2); - } else { - int kf_boost = 32; - double framerate = cpi->framerate; - - kf_boost = AOMMAX(kf_boost, (int)(2 * framerate - 16)); - if (rc->frames_since_key < framerate / 2) { - kf_boost = (int)(kf_boost * rc->frames_since_key / (framerate / 2)); - } - target = ((16 + kf_boost) * rc->avg_frame_bandwidth) >> 4; - } - return av1_rc_clamp_iframe_target_size(cpi, target); -} - -void av1_rc_get_one_pass_cbr_params(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - RATE_CONTROL *const rc = &cpi->rc; - int target; - // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic. - if ((cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) || - rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) { - cm->frame_type = KEY_FRAME; - rc->this_key_frame_forced = - cm->current_video_frame != 0 && rc->frames_to_key == 0; - rc->frames_to_key = cpi->oxcf.key_freq; - rc->kf_boost = DEFAULT_KF_BOOST; - rc->source_alt_ref_active = 0; - } else { - cm->frame_type = INTER_FRAME; - } - if (rc->frames_till_gf_update_due == 0) { - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) - av1_cyclic_refresh_set_golden_update(cpi); - else - rc->baseline_gf_interval = - (rc->min_gf_interval + rc->max_gf_interval) / 2; - rc->frames_till_gf_update_due = rc->baseline_gf_interval; - // NOTE: frames_till_gf_update_due must be <= frames_to_key. - if (rc->frames_till_gf_update_due > rc->frames_to_key) - rc->frames_till_gf_update_due = rc->frames_to_key; - cpi->refresh_golden_frame = 1; - rc->gfu_boost = DEFAULT_GF_BOOST; - } - - // Any update/change of global cyclic refresh parameters (amount/delta-qp) - // should be done here, before the frame qp is selected. - if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) - av1_cyclic_refresh_update_parameters(cpi); - - if (cm->frame_type == KEY_FRAME) - target = calc_iframe_target_size_one_pass_cbr(cpi); - else - target = calc_pframe_target_size_one_pass_cbr(cpi); - - rc_set_frame_target(cpi, target, cm->width, cm->height); - // TODO(afergs): Decide whether to scale up, down, or not at all -} - -int av1_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget, - aom_bit_depth_t bit_depth) { - int start_index = rc->worst_quality; - int target_index = rc->worst_quality; - int i; - - // Convert the average q value to an index. - for (i = rc->best_quality; i < rc->worst_quality; ++i) { - start_index = i; - if (av1_convert_qindex_to_q(i, bit_depth) >= qstart) break; - } - - // Convert the q target to an index - for (i = rc->best_quality; i < rc->worst_quality; ++i) { - target_index = i; - if (av1_convert_qindex_to_q(i, bit_depth) >= qtarget) break; - } - - return target_index - start_index; -} - -int av1_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, - int qindex, double rate_target_ratio, - aom_bit_depth_t bit_depth) { - int target_index = rc->worst_quality; - int i; - - // Look up the current projected bits per block for the base index - const int base_bits_per_mb = - av1_rc_bits_per_mb(frame_type, qindex, 1.0, bit_depth); - - // Find the target bits per mb based on the base value and given ratio. - const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb); - - // Convert the q target to an index - for (i = rc->best_quality; i < rc->worst_quality; ++i) { - if (av1_rc_bits_per_mb(frame_type, i, 1.0, bit_depth) <= - target_bits_per_mb) { - target_index = i; - break; - } - } - return target_index - qindex; -} - -void av1_rc_set_gf_interval_range(const AV1_COMP *const cpi, - RATE_CONTROL *const rc) { - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - - // Special case code for 1 pass fixed Q mode tests - if ((oxcf->pass == 0) && (oxcf->rc_mode == AOM_Q)) { - rc->max_gf_interval = FIXED_GF_INTERVAL; - rc->min_gf_interval = FIXED_GF_INTERVAL; - rc->static_scene_max_gf_interval = FIXED_GF_INTERVAL; - } else { - // Set Maximum gf/arf interval - rc->max_gf_interval = oxcf->max_gf_interval; - rc->min_gf_interval = oxcf->min_gf_interval; - if (rc->min_gf_interval == 0) - rc->min_gf_interval = av1_rc_get_default_min_gf_interval( - oxcf->width, oxcf->height, cpi->framerate); - if (rc->max_gf_interval == 0) - rc->max_gf_interval = av1_rc_get_default_max_gf_interval( - cpi->framerate, rc->min_gf_interval); - - // Extended interval for genuinely static scenes - rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2; - - if (is_altref_enabled(cpi)) { - if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1) - rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1; - } - - if (rc->max_gf_interval > rc->static_scene_max_gf_interval) - rc->max_gf_interval = rc->static_scene_max_gf_interval; - - // Clamp min to max - rc->min_gf_interval = AOMMIN(rc->min_gf_interval, rc->max_gf_interval); - } -} - -void av1_rc_update_framerate(AV1_COMP *cpi, int width, int height) { - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - RATE_CONTROL *const rc = &cpi->rc; - int vbr_max_bits; - const int MBs = av1_get_MBs(width, height); - - rc->avg_frame_bandwidth = (int)(oxcf->target_bandwidth / cpi->framerate); - rc->min_frame_bandwidth = - (int)(rc->avg_frame_bandwidth * oxcf->two_pass_vbrmin_section / 100); - - rc->min_frame_bandwidth = - AOMMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS); - - // A maximum bitrate for a frame is defined. - // The baseline for this aligns with HW implementations that - // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits - // per 16x16 MB (averaged over a frame). However this limit is extended if - // a very high rate is given on the command line or the the rate cannnot - // be acheived because of a user specificed max q (e.g. when the user - // specifies lossless encode. - vbr_max_bits = - (int)(((int64_t)rc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) / - 100); - rc->max_frame_bandwidth = - AOMMAX(AOMMAX((MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits); - - av1_rc_set_gf_interval_range(cpi, rc); -} - -#define VBR_PCT_ADJUSTMENT_LIMIT 50 -// For VBR...adjustment to the frame target based on error from previous frames -static void vbr_rate_correction(AV1_COMP *cpi, int *this_frame_target) { - RATE_CONTROL *const rc = &cpi->rc; - int64_t vbr_bits_off_target = rc->vbr_bits_off_target; - int max_delta; - double position_factor = 1.0; - - // How far through the clip are we. - // This number is used to damp the per frame rate correction. - // Range 0 - 1.0 - if (cpi->twopass.total_stats.count != 0.) { - position_factor = sqrt((double)cpi->common.current_video_frame / - cpi->twopass.total_stats.count); - } - max_delta = (int)(position_factor * - ((*this_frame_target * VBR_PCT_ADJUSTMENT_LIMIT) / 100)); - - // vbr_bits_off_target > 0 means we have extra bits to spend - if (vbr_bits_off_target > 0) { - *this_frame_target += (vbr_bits_off_target > max_delta) - ? max_delta - : (int)vbr_bits_off_target; - } else { - *this_frame_target -= (vbr_bits_off_target < -max_delta) - ? max_delta - : (int)-vbr_bits_off_target; - } - - // Fast redistribution of bits arising from massive local undershoot. - // Dont do it for kf,arf,gf or overlay frames. - if (!frame_is_kf_gf_arf(cpi) && !rc->is_src_frame_alt_ref && - rc->vbr_bits_off_target_fast) { - int one_frame_bits = AOMMAX(rc->avg_frame_bandwidth, *this_frame_target); - int fast_extra_bits; - fast_extra_bits = (int)AOMMIN(rc->vbr_bits_off_target_fast, one_frame_bits); - fast_extra_bits = (int)AOMMIN( - fast_extra_bits, - AOMMAX(one_frame_bits / 8, rc->vbr_bits_off_target_fast / 8)); - *this_frame_target += (int)fast_extra_bits; - rc->vbr_bits_off_target_fast -= fast_extra_bits; - } -} - -void av1_set_target_rate(AV1_COMP *cpi, int width, int height) { - RATE_CONTROL *const rc = &cpi->rc; - int target_rate = rc->base_frame_target; - - // Correction to rate target based on prior over or under shoot. - if (cpi->oxcf.rc_mode == AOM_VBR || cpi->oxcf.rc_mode == AOM_CQ) - vbr_rate_correction(cpi, &target_rate); - rc_set_frame_target(cpi, target_rate, width, height); -} diff --git a/third_party/aom/av1/encoder/ratectrl.h b/third_party/aom/av1/encoder/ratectrl.h deleted file mode 100644 index 198ecab97..000000000 --- a/third_party/aom/av1/encoder/ratectrl.h +++ /dev/null @@ -1,295 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_RATECTRL_H_ -#define AOM_AV1_ENCODER_RATECTRL_H_ - -#include "aom/aom_codec.h" -#include "aom/aom_integer.h" - -#include "av1/common/blockd.h" - -#ifdef __cplusplus -extern "C" { -#endif - -// Bits Per MB at different Q (Multiplied by 512) -#define BPER_MB_NORMBITS 9 - -#define CUSTOMIZED_GF 1 - -#if CONFIG_FIX_GF_LENGTH -#define FIXED_GF_LENGTH 16 -#define MAX_PYRAMID_LVL 4 -// We allow a frame to have at most two left/right descendants before changing -// them into to a subtree, i.e., we allow the following structure: -/* OUT_OF_ORDER_FRAME - / / \ \ -(two left children) F F F F (two right children) */ -// Therefore the max gf size supported by 4 layer structure is -// 1 (KEY/OVERLAY) + 1 + 2 + 4 + 16 (two children on both side of their parent) -#define MAX_PYRAMID_SIZE 24 -#define USE_SYMM_MULTI_LAYER 1 -#define REDUCE_LAST_ALT_BOOST 1 -#define REDUCE_LAST_GF_LENGTH 1 -#define MULTI_LVL_BOOST_VBR_CQ 1 -#else -#define USE_SYMM_MULTI_LAYER 0 -#define REDUCE_LAST_ALT_BOOST 0 -#define REDUCE_LAST_GF_LENGTH 0 -#define MULTI_LVL_BOOST_VBR_CQ 0 -#endif - -#if USE_SYMM_MULTI_LAYER -#define USE_MANUAL_GF4_STRUCT 0 -#endif - -#define MIN_GF_INTERVAL 4 -#define MAX_GF_INTERVAL 16 -#define FIXED_GF_INTERVAL 8 // Used in some testing modes only - -typedef enum { - INTER_NORMAL = 0, - INTER_LOW = 1, - INTER_HIGH = 2, - GF_ARF_LOW = 3, - GF_ARF_STD = 4, - KF_STD = 5, - RATE_FACTOR_LEVELS = 6 -} RATE_FACTOR_LEVEL; - -static const double rate_factor_deltas[RATE_FACTOR_LEVELS] = { - 1.00, // INTER_NORMAL - 0.80, // INTER_LOW - 1.50, // INTER_HIGH - 1.25, // GF_ARF_LOW - 2.00, // GF_ARF_STD - 2.00, // KF_STD -}; - -typedef struct { - int resize_width; - int resize_height; - uint8_t superres_denom; -} size_params_type; - -typedef struct { - // Rate targetting variables - int base_frame_target; // A baseline frame target before adjustment - // for previous under or over shoot. - int this_frame_target; // Actual frame target after rc adjustment. - int projected_frame_size; - int sb64_target_rate; - int last_q[FRAME_TYPES]; // Separate values for Intra/Inter - int last_boosted_qindex; // Last boosted GF/KF/ARF q - int last_kf_qindex; // Q index of the last key frame coded. - - int gfu_boost; - int last_boost; - int kf_boost; - - double rate_correction_factors[RATE_FACTOR_LEVELS]; - - int frames_since_golden; - int frames_till_gf_update_due; - int min_gf_interval; - int max_gf_interval; - int static_scene_max_gf_interval; - int baseline_gf_interval; - int constrained_gf_group; - int frames_to_key; - int frames_since_key; - int this_key_frame_forced; - int next_key_frame_forced; - int source_alt_ref_pending; - int source_alt_ref_active; - int is_src_frame_alt_ref; - int sframe_due; - - // Length of the bi-predictive frame group interval - int bipred_group_interval; - - // NOTE: Different types of frames may have different bits allocated - // accordingly, aiming to achieve the overall optimal RD performance. - int is_bwd_ref_frame; - int is_last_bipred_frame; - int is_bipred_frame; - int is_src_frame_ext_arf; - - int avg_frame_bandwidth; // Average frame size target for clip - int min_frame_bandwidth; // Minimum allocation used for any frame - int max_frame_bandwidth; // Maximum burst rate allowed for a frame. - - int ni_av_qi; - int ni_tot_qi; - int ni_frames; - int avg_frame_qindex[FRAME_TYPES]; - double tot_q; - double avg_q; - - int64_t buffer_level; - int64_t bits_off_target; - int64_t vbr_bits_off_target; - int64_t vbr_bits_off_target_fast; - - int decimation_factor; - int decimation_count; - - int rolling_target_bits; - int rolling_actual_bits; - - int long_rolling_target_bits; - int long_rolling_actual_bits; - - int rate_error_estimate; - - int64_t total_actual_bits; - int64_t total_target_bits; - int64_t total_target_vs_actual; - - int worst_quality; - int best_quality; - - int64_t starting_buffer_level; - int64_t optimal_buffer_level; - int64_t maximum_buffer_size; - - // rate control history for last frame(1) and the frame before(2). - // -1: undershot - // 1: overshoot - // 0: not initialized. - int rc_1_frame; - int rc_2_frame; - int q_1_frame; - int q_2_frame; - - // Auto frame-scaling variables. - int rf_level_maxq[RATE_FACTOR_LEVELS]; - float_t arf_boost_factor; - // Q index used for ALT frame - int arf_q; -} RATE_CONTROL; - -struct AV1_COMP; -struct AV1EncoderConfig; - -void av1_rc_init(const struct AV1EncoderConfig *oxcf, int pass, - RATE_CONTROL *rc); - -int av1_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs, - double correction_factor, aom_bit_depth_t bit_depth); - -double av1_convert_qindex_to_q(int qindex, aom_bit_depth_t bit_depth); - -void av1_rc_init_minq_luts(void); - -int av1_rc_get_default_min_gf_interval(int width, int height, double framerate); -// Note av1_rc_get_default_max_gf_interval() requires the min_gf_interval to -// be passed in to ensure that the max_gf_interval returned is at least as bis -// as that. -int av1_rc_get_default_max_gf_interval(double framerate, int min_frame_rate); - -// Generally at the high level, the following flow is expected -// to be enforced for rate control: -// First call per frame, one of: -// av1_rc_get_one_pass_vbr_params() -// av1_rc_get_one_pass_cbr_params() -// av1_rc_get_first_pass_params() -// av1_rc_get_second_pass_params() -// depending on the usage to set the rate control encode parameters desired. -// -// Then, call encode_frame_to_data_rate() to perform the -// actual encode. This function will in turn call encode_frame() -// one or more times, followed by one of: -// av1_rc_postencode_update() -// av1_rc_postencode_update_drop_frame() -// -// The majority of rate control parameters are only expected -// to be set in the av1_rc_get_..._params() functions and -// updated during the av1_rc_postencode_update...() functions. -// The only exceptions are av1_rc_drop_frame() and -// av1_rc_update_rate_correction_factors() functions. - -// Functions to set parameters for encoding before the actual -// encode_frame_to_data_rate() function. -void av1_rc_get_one_pass_vbr_params(struct AV1_COMP *cpi); -void av1_rc_get_one_pass_cbr_params(struct AV1_COMP *cpi); - -// Post encode update of the rate control parameters based -// on bytes used -void av1_rc_postencode_update(struct AV1_COMP *cpi, uint64_t bytes_used); -// Post encode update of the rate control parameters for dropped frames -void av1_rc_postencode_update_drop_frame(struct AV1_COMP *cpi); - -// Updates rate correction factors -// Changes only the rate correction factors in the rate control structure. -void av1_rc_update_rate_correction_factors(struct AV1_COMP *cpi, int width, - int height); - -// Decide if we should drop this frame: For 1-pass CBR. -// Changes only the decimation count in the rate control structure -int av1_rc_drop_frame(struct AV1_COMP *cpi); - -// Computes frame size bounds. -void av1_rc_compute_frame_size_bounds(const struct AV1_COMP *cpi, - int this_frame_target, - int *frame_under_shoot_limit, - int *frame_over_shoot_limit); - -// Picks q and q bounds given the target for bits -int av1_rc_pick_q_and_bounds(struct AV1_COMP *cpi, int width, int height, - int *bottom_index, int *top_index); - -// Estimates q to achieve a target bits per frame -int av1_rc_regulate_q(const struct AV1_COMP *cpi, int target_bits_per_frame, - int active_best_quality, int active_worst_quality, - int width, int height); - -// Estimates bits per mb for a given qindex and correction factor. -int av1_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex, - double correction_factor, aom_bit_depth_t bit_depth); - -// Clamping utilities for bitrate targets for iframes and pframes. -int av1_rc_clamp_iframe_target_size(const struct AV1_COMP *const cpi, - int target); -int av1_rc_clamp_pframe_target_size(const struct AV1_COMP *const cpi, - int target); -// Utility to set frame_target into the RATE_CONTROL structure -// This function is called only from the av1_rc_get_..._params() functions. -void av1_rc_set_frame_target(struct AV1_COMP *cpi, int target); - -// Computes a q delta (in "q index" terms) to get from a starting q value -// to a target q value -int av1_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget, - aom_bit_depth_t bit_depth); - -// Computes a q delta (in "q index" terms) to get from a starting q value -// to a value that should equate to the given rate ratio. -int av1_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, - int qindex, double rate_target_ratio, - aom_bit_depth_t bit_depth); - -int av1_frame_type_qdelta(const struct AV1_COMP *cpi, int rf_level, int q); - -void av1_rc_update_framerate(struct AV1_COMP *cpi, int width, int height); - -void av1_rc_set_gf_interval_range(const struct AV1_COMP *const cpi, - RATE_CONTROL *const rc); - -void av1_set_target_rate(struct AV1_COMP *cpi, int width, int height); - -int av1_resize_one_pass_cbr(struct AV1_COMP *cpi); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_RATECTRL_H_ diff --git a/third_party/aom/av1/encoder/rd.c b/third_party/aom/av1/encoder/rd.c deleted file mode 100644 index b87d89e50..000000000 --- a/third_party/aom/av1/encoder/rd.c +++ /dev/null @@ -1,1512 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include - -#include "config/av1_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/bitops.h" -#include "aom_ports/mem.h" -#include "aom_ports/system_state.h" - -#include "av1/common/common.h" -#include "av1/common/entropy.h" -#include "av1/common/entropymode.h" -#include "av1/common/mvref_common.h" -#include "av1/common/pred_common.h" -#include "av1/common/quant_common.h" -#include "av1/common/reconinter.h" -#include "av1/common/reconintra.h" -#include "av1/common/seg_common.h" - -#include "av1/encoder/av1_quantize.h" -#include "av1/encoder/cost.h" -#include "av1/encoder/encodemb.h" -#include "av1/encoder/encodemv.h" -#include "av1/encoder/encoder.h" -#include "av1/encoder/encodetxb.h" -#include "av1/encoder/mcomp.h" -#include "av1/encoder/ratectrl.h" -#include "av1/encoder/rd.h" -#include "av1/encoder/tokenize.h" - -#define RD_THRESH_POW 1.25 - -// The baseline rd thresholds for breaking out of the rd loop for -// certain modes are assumed to be based on 8x8 blocks. -// This table is used to correct for block size. -// The factors here are << 2 (2 = x0.5, 32 = x8 etc). -static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES_ALL] = { - 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32, 48, 48, 64, 4, 4, 8, 8, 16, 16 -}; - -static const int use_intra_ext_tx_for_txsize[EXT_TX_SETS_INTRA][EXT_TX_SIZES] = - { - { 1, 1, 1, 1 }, // unused - { 1, 1, 0, 0 }, - { 0, 0, 1, 0 }, - }; - -static const int use_inter_ext_tx_for_txsize[EXT_TX_SETS_INTER][EXT_TX_SIZES] = - { - { 1, 1, 1, 1 }, // unused - { 1, 1, 0, 0 }, - { 0, 0, 1, 0 }, - { 0, 0, 0, 1 }, - }; - -static const int av1_ext_tx_set_idx_to_type[2][AOMMAX(EXT_TX_SETS_INTRA, - EXT_TX_SETS_INTER)] = { - { - // Intra - EXT_TX_SET_DCTONLY, - EXT_TX_SET_DTT4_IDTX_1DDCT, - EXT_TX_SET_DTT4_IDTX, - }, - { - // Inter - EXT_TX_SET_DCTONLY, - EXT_TX_SET_ALL16, - EXT_TX_SET_DTT9_IDTX_1DDCT, - EXT_TX_SET_DCT_IDTX, - }, -}; - -void av1_fill_mode_rates(AV1_COMMON *const cm, MACROBLOCK *x, - FRAME_CONTEXT *fc) { - int i, j; - - for (i = 0; i < PARTITION_CONTEXTS; ++i) - av1_cost_tokens_from_cdf(x->partition_cost[i], fc->partition_cdf[i], NULL); - - if (cm->skip_mode_flag) { - for (i = 0; i < SKIP_CONTEXTS; ++i) { - av1_cost_tokens_from_cdf(x->skip_mode_cost[i], fc->skip_mode_cdfs[i], - NULL); - } - } - - for (i = 0; i < SKIP_CONTEXTS; ++i) { - av1_cost_tokens_from_cdf(x->skip_cost[i], fc->skip_cdfs[i], NULL); - } - - for (i = 0; i < KF_MODE_CONTEXTS; ++i) - for (j = 0; j < KF_MODE_CONTEXTS; ++j) - av1_cost_tokens_from_cdf(x->y_mode_costs[i][j], fc->kf_y_cdf[i][j], NULL); - - for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) - av1_cost_tokens_from_cdf(x->mbmode_cost[i], fc->y_mode_cdf[i], NULL); - for (i = 0; i < CFL_ALLOWED_TYPES; ++i) - for (j = 0; j < INTRA_MODES; ++j) - av1_cost_tokens_from_cdf(x->intra_uv_mode_cost[i][j], - fc->uv_mode_cdf[i][j], NULL); - - av1_cost_tokens_from_cdf(x->filter_intra_mode_cost, fc->filter_intra_mode_cdf, - NULL); - for (i = 0; i < BLOCK_SIZES_ALL; ++i) { - if (av1_filter_intra_allowed_bsize(cm, i)) - av1_cost_tokens_from_cdf(x->filter_intra_cost[i], - fc->filter_intra_cdfs[i], NULL); - } - - for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - av1_cost_tokens_from_cdf(x->switchable_interp_costs[i], - fc->switchable_interp_cdf[i], NULL); - - for (i = 0; i < PALATTE_BSIZE_CTXS; ++i) { - av1_cost_tokens_from_cdf(x->palette_y_size_cost[i], - fc->palette_y_size_cdf[i], NULL); - av1_cost_tokens_from_cdf(x->palette_uv_size_cost[i], - fc->palette_uv_size_cdf[i], NULL); - for (j = 0; j < PALETTE_Y_MODE_CONTEXTS; ++j) { - av1_cost_tokens_from_cdf(x->palette_y_mode_cost[i][j], - fc->palette_y_mode_cdf[i][j], NULL); - } - } - - for (i = 0; i < PALETTE_UV_MODE_CONTEXTS; ++i) { - av1_cost_tokens_from_cdf(x->palette_uv_mode_cost[i], - fc->palette_uv_mode_cdf[i], NULL); - } - - for (i = 0; i < PALETTE_SIZES; ++i) { - for (j = 0; j < PALETTE_COLOR_INDEX_CONTEXTS; ++j) { - av1_cost_tokens_from_cdf(x->palette_y_color_cost[i][j], - fc->palette_y_color_index_cdf[i][j], NULL); - av1_cost_tokens_from_cdf(x->palette_uv_color_cost[i][j], - fc->palette_uv_color_index_cdf[i][j], NULL); - } - } - - int sign_cost[CFL_JOINT_SIGNS]; - av1_cost_tokens_from_cdf(sign_cost, fc->cfl_sign_cdf, NULL); - for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) { - int *cost_u = x->cfl_cost[joint_sign][CFL_PRED_U]; - int *cost_v = x->cfl_cost[joint_sign][CFL_PRED_V]; - if (CFL_SIGN_U(joint_sign) == CFL_SIGN_ZERO) { - memset(cost_u, 0, CFL_ALPHABET_SIZE * sizeof(*cost_u)); - } else { - const aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)]; - av1_cost_tokens_from_cdf(cost_u, cdf_u, NULL); - } - if (CFL_SIGN_V(joint_sign) == CFL_SIGN_ZERO) { - memset(cost_v, 0, CFL_ALPHABET_SIZE * sizeof(*cost_v)); - } else { - const aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)]; - av1_cost_tokens_from_cdf(cost_v, cdf_v, NULL); - } - for (int u = 0; u < CFL_ALPHABET_SIZE; u++) - cost_u[u] += sign_cost[joint_sign]; - } - - for (i = 0; i < MAX_TX_CATS; ++i) - for (j = 0; j < TX_SIZE_CONTEXTS; ++j) - av1_cost_tokens_from_cdf(x->tx_size_cost[i][j], fc->tx_size_cdf[i][j], - NULL); - - for (i = 0; i < TXFM_PARTITION_CONTEXTS; ++i) { - av1_cost_tokens_from_cdf(x->txfm_partition_cost[i], - fc->txfm_partition_cdf[i], NULL); - } - - for (i = TX_4X4; i < EXT_TX_SIZES; ++i) { - int s; - for (s = 1; s < EXT_TX_SETS_INTER; ++s) { - if (use_inter_ext_tx_for_txsize[s][i]) { - av1_cost_tokens_from_cdf( - x->inter_tx_type_costs[s][i], fc->inter_ext_tx_cdf[s][i], - av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[1][s]]); - } - } - for (s = 1; s < EXT_TX_SETS_INTRA; ++s) { - if (use_intra_ext_tx_for_txsize[s][i]) { - for (j = 0; j < INTRA_MODES; ++j) { - av1_cost_tokens_from_cdf( - x->intra_tx_type_costs[s][i][j], fc->intra_ext_tx_cdf[s][i][j], - av1_ext_tx_inv[av1_ext_tx_set_idx_to_type[0][s]]); - } - } - } - } - for (i = 0; i < DIRECTIONAL_MODES; ++i) { - av1_cost_tokens_from_cdf(x->angle_delta_cost[i], fc->angle_delta_cdf[i], - NULL); - } - av1_cost_tokens_from_cdf(x->switchable_restore_cost, - fc->switchable_restore_cdf, NULL); - av1_cost_tokens_from_cdf(x->wiener_restore_cost, fc->wiener_restore_cdf, - NULL); - av1_cost_tokens_from_cdf(x->sgrproj_restore_cost, fc->sgrproj_restore_cdf, - NULL); - av1_cost_tokens_from_cdf(x->intrabc_cost, fc->intrabc_cdf, NULL); - - if (!frame_is_intra_only(cm)) { - for (i = 0; i < COMP_INTER_CONTEXTS; ++i) { - av1_cost_tokens_from_cdf(x->comp_inter_cost[i], fc->comp_inter_cdf[i], - NULL); - } - - for (i = 0; i < REF_CONTEXTS; ++i) { - for (j = 0; j < SINGLE_REFS - 1; ++j) { - av1_cost_tokens_from_cdf(x->single_ref_cost[i][j], - fc->single_ref_cdf[i][j], NULL); - } - } - - for (i = 0; i < COMP_REF_TYPE_CONTEXTS; ++i) { - av1_cost_tokens_from_cdf(x->comp_ref_type_cost[i], - fc->comp_ref_type_cdf[i], NULL); - } - - for (i = 0; i < UNI_COMP_REF_CONTEXTS; ++i) { - for (j = 0; j < UNIDIR_COMP_REFS - 1; ++j) { - av1_cost_tokens_from_cdf(x->uni_comp_ref_cost[i][j], - fc->uni_comp_ref_cdf[i][j], NULL); - } - } - - for (i = 0; i < REF_CONTEXTS; ++i) { - for (j = 0; j < FWD_REFS - 1; ++j) { - av1_cost_tokens_from_cdf(x->comp_ref_cost[i][j], fc->comp_ref_cdf[i][j], - NULL); - } - } - - for (i = 0; i < REF_CONTEXTS; ++i) { - for (j = 0; j < BWD_REFS - 1; ++j) { - av1_cost_tokens_from_cdf(x->comp_bwdref_cost[i][j], - fc->comp_bwdref_cdf[i][j], NULL); - } - } - - for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) { - av1_cost_tokens_from_cdf(x->intra_inter_cost[i], fc->intra_inter_cdf[i], - NULL); - } - - for (i = 0; i < NEWMV_MODE_CONTEXTS; ++i) { - av1_cost_tokens_from_cdf(x->newmv_mode_cost[i], fc->newmv_cdf[i], NULL); - } - - for (i = 0; i < GLOBALMV_MODE_CONTEXTS; ++i) { - av1_cost_tokens_from_cdf(x->zeromv_mode_cost[i], fc->zeromv_cdf[i], NULL); - } - - for (i = 0; i < REFMV_MODE_CONTEXTS; ++i) { - av1_cost_tokens_from_cdf(x->refmv_mode_cost[i], fc->refmv_cdf[i], NULL); - } - - for (i = 0; i < DRL_MODE_CONTEXTS; ++i) { - av1_cost_tokens_from_cdf(x->drl_mode_cost0[i], fc->drl_cdf[i], NULL); - } - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - av1_cost_tokens_from_cdf(x->inter_compound_mode_cost[i], - fc->inter_compound_mode_cdf[i], NULL); - for (i = 0; i < BLOCK_SIZES_ALL; ++i) - av1_cost_tokens_from_cdf(x->compound_type_cost[i], - fc->compound_type_cdf[i], NULL); - for (i = 0; i < BLOCK_SIZES_ALL; ++i) { - if (get_interinter_wedge_bits(i)) { - av1_cost_tokens_from_cdf(x->wedge_idx_cost[i], fc->wedge_idx_cdf[i], - NULL); - } - } - for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) { - av1_cost_tokens_from_cdf(x->interintra_cost[i], fc->interintra_cdf[i], - NULL); - av1_cost_tokens_from_cdf(x->interintra_mode_cost[i], - fc->interintra_mode_cdf[i], NULL); - } - for (i = 0; i < BLOCK_SIZES_ALL; ++i) { - av1_cost_tokens_from_cdf(x->wedge_interintra_cost[i], - fc->wedge_interintra_cdf[i], NULL); - } - for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) { - av1_cost_tokens_from_cdf(x->motion_mode_cost[i], fc->motion_mode_cdf[i], - NULL); - } - for (i = BLOCK_8X8; i < BLOCK_SIZES_ALL; i++) { - av1_cost_tokens_from_cdf(x->motion_mode_cost1[i], fc->obmc_cdf[i], NULL); - } - for (i = 0; i < COMP_INDEX_CONTEXTS; ++i) { - av1_cost_tokens_from_cdf(x->comp_idx_cost[i], fc->compound_index_cdf[i], - NULL); - } - for (i = 0; i < COMP_GROUP_IDX_CONTEXTS; ++i) { - av1_cost_tokens_from_cdf(x->comp_group_idx_cost[i], - fc->comp_group_idx_cdf[i], NULL); - } - } -} - -// Values are now correlated to quantizer. -static int sad_per_bit16lut_8[QINDEX_RANGE]; -static int sad_per_bit4lut_8[QINDEX_RANGE]; -static int sad_per_bit16lut_10[QINDEX_RANGE]; -static int sad_per_bit4lut_10[QINDEX_RANGE]; -static int sad_per_bit16lut_12[QINDEX_RANGE]; -static int sad_per_bit4lut_12[QINDEX_RANGE]; - -static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range, - aom_bit_depth_t bit_depth) { - int i; - // Initialize the sad lut tables using a formulaic calculation for now. - // This is to make it easier to resolve the impact of experimental changes - // to the quantizer tables. - for (i = 0; i < range; i++) { - const double q = av1_convert_qindex_to_q(i, bit_depth); - bit16lut[i] = (int)(0.0418 * q + 2.4107); - bit4lut[i] = (int)(0.063 * q + 2.742); - } -} - -void av1_init_me_luts(void) { - init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE, - AOM_BITS_8); - init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE, - AOM_BITS_10); - init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE, - AOM_BITS_12); -} - -static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12, - 8, 8, 4, 4, 2, 2, 1, 0 }; -static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { - 128, 144, 128, 128, 144, - // TODO(zoeliu): To adjust further following factor values. - 128, 128, 128, - // TODO(weitinglin): We should investigate if the values should be the same - // as the value used by OVERLAY frame - 144, // INTNL_OVERLAY_UPDATE - 128 // INTNL_ARF_UPDATE -}; - -int av1_compute_rd_mult(const AV1_COMP *cpi, int qindex) { - const int64_t q = - av1_dc_quant_Q3(qindex, 0, cpi->common.seq_params.bit_depth); - int64_t rdmult = 0; - switch (cpi->common.seq_params.bit_depth) { - case AOM_BITS_8: rdmult = 88 * q * q / 24; break; - case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break; - case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); break; - default: - assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); - return -1; - } - if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index]; - const int boost_index = AOMMIN(15, (cpi->rc.gfu_boost / 100)); - - rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7; - rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7); - } - if (rdmult < 1) rdmult = 1; - return (int)rdmult; -} - -static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) { - double q; - switch (bit_depth) { - case AOM_BITS_8: q = av1_dc_quant_Q3(qindex, 0, AOM_BITS_8) / 4.0; break; - case AOM_BITS_10: q = av1_dc_quant_Q3(qindex, 0, AOM_BITS_10) / 16.0; break; - case AOM_BITS_12: q = av1_dc_quant_Q3(qindex, 0, AOM_BITS_12) / 64.0; break; - default: - assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); - return -1; - } - // TODO(debargha): Adjust the function below. - return AOMMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8); -} - -void av1_initialize_me_consts(const AV1_COMP *cpi, MACROBLOCK *x, int qindex) { - switch (cpi->common.seq_params.bit_depth) { - case AOM_BITS_8: - x->sadperbit16 = sad_per_bit16lut_8[qindex]; - x->sadperbit4 = sad_per_bit4lut_8[qindex]; - break; - case AOM_BITS_10: - x->sadperbit16 = sad_per_bit16lut_10[qindex]; - x->sadperbit4 = sad_per_bit4lut_10[qindex]; - break; - case AOM_BITS_12: - x->sadperbit16 = sad_per_bit16lut_12[qindex]; - x->sadperbit4 = sad_per_bit4lut_12[qindex]; - break; - default: - assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); - } -} - -static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd) { - int i, bsize, segment_id; - - for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { - const int qindex = - clamp(av1_get_qindex(&cm->seg, segment_id, cm->base_qindex) + - cm->y_dc_delta_q, - 0, MAXQ); - const int q = compute_rd_thresh_factor(qindex, cm->seq_params.bit_depth); - - for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { - // Threshold here seems unnecessarily harsh but fine given actual - // range of values used for cpi->sf.thresh_mult[]. - const int t = q * rd_thresh_block_size_factor[bsize]; - const int thresh_max = INT_MAX / t; - - for (i = 0; i < MAX_MODES; ++i) - rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max - ? rd->thresh_mult[i] * t / 4 - : INT_MAX; - } - } -} - -void av1_set_mvcost(MACROBLOCK *x, int ref, int ref_mv_idx) { - (void)ref; - (void)ref_mv_idx; - x->mvcost = x->mv_cost_stack; - x->nmvjointcost = x->nmv_vec_cost; -} - -void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc, - const int num_planes) { - const int nplanes = AOMMIN(num_planes, PLANE_TYPES); - for (int eob_multi_size = 0; eob_multi_size < 7; ++eob_multi_size) { - for (int plane = 0; plane < nplanes; ++plane) { - LV_MAP_EOB_COST *pcost = &x->eob_costs[eob_multi_size][plane]; - - for (int ctx = 0; ctx < 2; ++ctx) { - aom_cdf_prob *pcdf; - switch (eob_multi_size) { - case 0: pcdf = fc->eob_flag_cdf16[plane][ctx]; break; - case 1: pcdf = fc->eob_flag_cdf32[plane][ctx]; break; - case 2: pcdf = fc->eob_flag_cdf64[plane][ctx]; break; - case 3: pcdf = fc->eob_flag_cdf128[plane][ctx]; break; - case 4: pcdf = fc->eob_flag_cdf256[plane][ctx]; break; - case 5: pcdf = fc->eob_flag_cdf512[plane][ctx]; break; - case 6: - default: pcdf = fc->eob_flag_cdf1024[plane][ctx]; break; - } - av1_cost_tokens_from_cdf(pcost->eob_cost[ctx], pcdf, NULL); - } - } - } - for (int tx_size = 0; tx_size < TX_SIZES; ++tx_size) { - for (int plane = 0; plane < nplanes; ++plane) { - LV_MAP_COEFF_COST *pcost = &x->coeff_costs[tx_size][plane]; - - for (int ctx = 0; ctx < TXB_SKIP_CONTEXTS; ++ctx) - av1_cost_tokens_from_cdf(pcost->txb_skip_cost[ctx], - fc->txb_skip_cdf[tx_size][ctx], NULL); - - for (int ctx = 0; ctx < SIG_COEF_CONTEXTS_EOB; ++ctx) - av1_cost_tokens_from_cdf(pcost->base_eob_cost[ctx], - fc->coeff_base_eob_cdf[tx_size][plane][ctx], - NULL); - for (int ctx = 0; ctx < SIG_COEF_CONTEXTS; ++ctx) - av1_cost_tokens_from_cdf(pcost->base_cost[ctx], - fc->coeff_base_cdf[tx_size][plane][ctx], NULL); - - for (int ctx = 0; ctx < EOB_COEF_CONTEXTS; ++ctx) - av1_cost_tokens_from_cdf(pcost->eob_extra_cost[ctx], - fc->eob_extra_cdf[tx_size][plane][ctx], NULL); - - for (int ctx = 0; ctx < DC_SIGN_CONTEXTS; ++ctx) - av1_cost_tokens_from_cdf(pcost->dc_sign_cost[ctx], - fc->dc_sign_cdf[plane][ctx], NULL); - - for (int ctx = 0; ctx < LEVEL_CONTEXTS; ++ctx) { - int br_rate[BR_CDF_SIZE]; - int prev_cost = 0; - int i, j; - av1_cost_tokens_from_cdf(br_rate, fc->coeff_br_cdf[tx_size][plane][ctx], - NULL); - // printf("br_rate: "); - // for(j = 0; j < BR_CDF_SIZE; j++) - // printf("%4d ", br_rate[j]); - // printf("\n"); - for (i = 0; i < COEFF_BASE_RANGE; i += BR_CDF_SIZE - 1) { - for (j = 0; j < BR_CDF_SIZE - 1; j++) { - pcost->lps_cost[ctx][i + j] = prev_cost + br_rate[j]; - } - prev_cost += br_rate[j]; - } - pcost->lps_cost[ctx][i] = prev_cost; - // printf("lps_cost: %d %d %2d : ", tx_size, plane, ctx); - // for (i = 0; i <= COEFF_BASE_RANGE; i++) - // printf("%5d ", pcost->lps_cost[ctx][i]); - // printf("\n"); - } - } - } -} - -void av1_initialize_rd_consts(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->td.mb; - RD_OPT *const rd = &cpi->rd; - - aom_clear_system_state(); - - rd->RDMULT = av1_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); - - set_error_per_bit(x, rd->RDMULT); - - set_block_thresholds(cm, rd); - - if (cm->cur_frame_force_integer_mv) { - av1_build_nmv_cost_table(x->nmv_vec_cost, x->nmvcost, &cm->fc->nmvc, - MV_SUBPEL_NONE); - } else { - av1_build_nmv_cost_table( - x->nmv_vec_cost, - cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, &cm->fc->nmvc, - cm->allow_high_precision_mv); - } - - x->mvcost = x->mv_cost_stack; - x->nmvjointcost = x->nmv_vec_cost; - - if (frame_is_intra_only(cm) && cm->allow_screen_content_tools && - cpi->oxcf.pass != 1) { - int *dvcost[2] = { &cpi->dv_cost[0][MV_MAX], &cpi->dv_cost[1][MV_MAX] }; - av1_build_nmv_cost_table(cpi->dv_joint_cost, dvcost, &cm->fc->ndvc, - MV_SUBPEL_NONE); - } - - if (cpi->oxcf.pass != 1) { - for (int i = 0; i < TRANS_TYPES; ++i) - // IDENTITY: 1 bit - // TRANSLATION: 3 bits - // ROTZOOM: 2 bits - // AFFINE: 3 bits - cpi->gmtype_cost[i] = (1 + (i > 0 ? (i == ROTZOOM ? 1 : 2) : 0)) - << AV1_PROB_COST_SHIFT; - } -} - -static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { - // NOTE: The tables below must be of the same size. - - // The functions described below are sampled at the four most significant - // bits of x^2 + 8 / 256. - - // Normalized rate: - // This table models the rate for a Laplacian source with given variance - // when quantized with a uniform quantizer with given stepsize. The - // closed form expression is: - // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)], - // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance), - // and H(x) is the binary entropy function. - static const int rate_tab_q10[] = { - 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, - 4044, 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, - 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, - 2290, 2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, - 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963, - 911, 864, 821, 781, 745, 680, 623, 574, 530, 490, 455, 424, - 395, 345, 304, 269, 239, 213, 190, 171, 154, 126, 104, 87, - 73, 61, 52, 44, 38, 28, 21, 16, 12, 10, 8, 6, - 5, 3, 2, 1, 1, 1, 0, 0, - }; - // Normalized distortion: - // This table models the normalized distortion for a Laplacian source - // with given variance when quantized with a uniform quantizer - // with given stepsize. The closed form expression is: - // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2)) - // where x = qpstep / sqrt(variance). - // Note the actual distortion is Dn * variance. - static const int dist_tab_q10[] = { - 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, - 5, 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17, - 18, 21, 24, 26, 29, 31, 34, 36, 39, 44, 49, 54, - 59, 64, 69, 73, 78, 88, 97, 106, 115, 124, 133, 142, - 151, 167, 184, 200, 215, 231, 245, 260, 274, 301, 327, 351, - 375, 397, 418, 439, 458, 495, 528, 559, 587, 613, 637, 659, - 680, 717, 749, 777, 801, 823, 842, 859, 874, 899, 919, 936, - 949, 960, 969, 977, 983, 994, 1001, 1006, 1010, 1013, 1015, 1017, - 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024, - }; - static const int xsq_iq_q10[] = { - 0, 4, 8, 12, 16, 20, 24, 28, 32, - 40, 48, 56, 64, 72, 80, 88, 96, 112, - 128, 144, 160, 176, 192, 208, 224, 256, 288, - 320, 352, 384, 416, 448, 480, 544, 608, 672, - 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504, - 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296, - 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136, - 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328, - 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736, - 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696, - 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808, - 180192, 196576, 212960, 229344, 245728, - }; - const int tmp = (xsq_q10 >> 2) + 8; - const int k = get_msb(tmp) - 3; - const int xq = (k << 3) + ((tmp >> k) & 0x7); - const int one_q10 = 1 << 10; - const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k); - const int b_q10 = one_q10 - a_q10; - *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10; - *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10; -} - -void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n_log2, - unsigned int qstep, int *rate, - int64_t *dist) { - // This function models the rate and distortion for a Laplacian - // source with given variance when quantized with a uniform quantizer - // with given stepsize. The closed form expressions are in: - // Hang and Chen, "Source Model for transform video coder and its - // application - Part I: Fundamental Theory", IEEE Trans. Circ. - // Sys. for Video Tech., April 1997. - if (var == 0) { - *rate = 0; - *dist = 0; - } else { - int d_q10, r_q10; - static const uint32_t MAX_XSQ_Q10 = 245727; - const uint64_t xsq_q10_64 = - (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var; - const int xsq_q10 = (int)AOMMIN(xsq_q10_64, MAX_XSQ_Q10); - model_rd_norm(xsq_q10, &r_q10, &d_q10); - *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - AV1_PROB_COST_SHIFT); - *dist = (var * (int64_t)d_q10 + 512) >> 10; - } -} - -static double interp_cubic(const double *p, double x) { - return p[1] + 0.5 * x * - (p[2] - p[0] + - x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + - x * (3.0 * (p[1] - p[2]) + p[3] - p[0]))); -} - -static double interp_bicubic(const double *p, int p_stride, double x, - double y) { - double q[4]; - q[0] = interp_cubic(p, x); - q[1] = interp_cubic(p + p_stride, x); - q[2] = interp_cubic(p + 2 * p_stride, x); - q[3] = interp_cubic(p + 3 * p_stride, x); - return interp_cubic(q, y); -} - -static const double interp_rgrid_surf[65 * 18] = { - 0.104019, 0.245714, 0.293686, 0.358635, 0.382167, 0.412446, - 0.419955, 0.421388, 0.426672, 0.427990, 0.428531, 0.456868, - 0.569880, 0.638822, 1.016319, 2.143453, 3.565229, 4.720880, - 0.124618, 0.294211, 0.352023, 0.429991, 0.458206, 0.494510, - 0.503513, 0.505232, 0.511566, 0.513234, 0.519365, 0.570225, - 0.697373, 0.840624, 1.462198, 3.289054, 6.256517, 6.852788, - 0.118630, 0.269669, 0.346620, 0.430999, 0.459385, 0.495783, - 0.504808, 0.506532, 0.512884, 0.514988, 0.543437, 0.662772, - 0.795876, 1.313596, 2.403841, 4.163098, 7.440589, 8.616275, - 0.093329, 0.168205, 0.321320, 0.430607, 0.459385, 0.495783, - 0.504813, 0.506548, 0.512975, 0.520662, 0.571659, 0.701841, - 1.010727, 2.138851, 3.460626, 6.317955, 10.098127, 14.418553, - 0.087021, 0.142905, 0.315011, 0.430509, 0.459385, 0.495787, - 0.505075, 0.507599, 0.513584, 0.543182, 0.669941, 0.825620, - 1.362800, 2.572187, 4.205047, 7.498399, 12.303118, 16.641735, - 0.086923, 0.142513, 0.314913, 0.430508, 0.459385, 0.495803, - 0.506126, 0.511816, 0.514810, 0.549705, 0.725350, 1.127334, - 2.168597, 3.463686, 6.318605, 10.162284, 18.556041, 19.847042, - 0.086923, 0.142513, 0.314913, 0.430506, 0.459376, 0.495805, - 0.506388, 0.512954, 0.520772, 0.580215, 0.810474, 1.391548, - 2.579442, 4.205160, 7.498399, 12.381597, 21.703618, 24.015457, - 0.086923, 0.142513, 0.314911, 0.430353, 0.458765, 0.495652, - 0.506391, 0.513406, 0.544098, 0.702950, 1.121860, 2.168961, - 3.463798, 6.318607, 10.162284, 18.685361, 28.188192, 37.638872, - 0.086923, 0.142513, 0.314901, 0.429742, 0.456313, 0.495045, - 0.506484, 0.519195, 0.580104, 0.810126, 1.391462, 2.579441, - 4.205160, 7.498399, 12.381597, 21.848607, 33.367199, 42.623190, - 0.086923, 0.142513, 0.314899, 0.429589, 0.455706, 0.495155, - 0.507882, 0.542426, 0.702360, 1.119921, 2.168478, 3.463791, - 6.318607, 10.162284, 18.685361, 28.345760, 47.802028, 49.163533, - 0.086924, 0.142548, 0.315086, 0.429842, 0.455870, 0.496336, - 0.512412, 0.556953, 0.773373, 1.266396, 2.548277, 4.204676, - 7.498399, 12.381597, 21.848607, 33.548250, 54.301011, 56.262859, - 0.087067, 0.144957, 0.327436, 0.446616, 0.466362, 0.505706, - 0.522077, 0.610747, 0.972543, 1.666916, 3.338812, 6.316669, - 10.162284, 18.685361, 28.345760, 48.065311, 66.145302, 78.396020, - 0.094295, 0.164235, 0.393722, 0.534219, 0.530922, 0.579308, - 0.603889, 0.760870, 1.229961, 2.423214, 4.173513, 7.497916, - 12.381597, 21.848607, 33.548250, 54.589585, 74.875848, 86.468182, - 0.124096, 0.213005, 0.497188, 0.665176, 0.685973, 0.800200, - 0.911394, 1.077971, 1.677290, 3.332129, 6.314960, 10.162257, - 18.685361, 28.345760, 48.065311, 66.453506, 98.275189, 96.862588, - 0.140999, 0.270140, 0.658212, 0.867661, 0.970183, 1.149516, - 1.480599, 1.664833, 2.421893, 3.857981, 7.418830, 12.380371, - 21.848607, 33.548250, 54.589585, 75.188867, 106.657971, 99.762997, - 0.178353, 0.398001, 0.988462, 1.241473, 1.340967, 1.713568, - 2.335030, 2.701432, 3.348532, 5.077158, 9.829903, 18.676528, - 28.345700, 48.065311, 66.453506, 98.588283, 117.057193, 101.130722, - 0.281079, 0.548300, 1.395825, 1.780770, 2.000508, 2.702964, - 3.638454, 4.573843, 5.051641, 7.079129, 11.293332, 21.594861, - 33.544335, 54.589585, 75.188867, 106.971065, 119.957601, 101.466632, - 0.476762, 0.842189, 2.019678, 2.723895, 3.188467, 4.011610, - 5.545111, 7.508984, 8.176339, 9.774504, 14.720782, 27.334416, - 48.049609, 66.453506, 98.588283, 117.370357, 121.329855, 101.509242, - 0.993999, 1.520111, 3.013605, 4.203530, 4.982992, 6.074944, - 8.583581, 11.818375, 14.192544, 14.937517, 21.258160, 33.305953, - 54.585735, 75.188867, 106.971135, 120.279824, 121.976055, 102.690130, - 1.776487, 2.613655, 4.356487, 6.161726, 7.622196, 9.464193, - 13.077233, 18.051656, 23.221051, 24.080068, 30.085038, 48.345269, - 66.457698, 98.588353, 117.379415, 121.976128, 124.356210, 107.713202, - 3.191085, 4.495201, 5.686033, 8.365566, 11.275339, 14.706437, - 20.300969, 28.152237, 35.688355, 39.341382, 41.030743, 55.752262, - 75.211764, 106.980285, 120.608403, 124.680746, 130.222528, 112.260098, - 6.136611, 7.305215, 7.272532, 10.646713, 15.630815, 22.383168, - 31.349131, 42.419822, 52.301680, 58.983454, 58.915405, 69.161305, - 98.992460, 117.713855, 124.344836, 130.623638, 138.442401, 127.846670, - 11.707980, 13.490761, 11.640845, 14.176132, 22.131124, 33.776462, - 47.365711, 61.603834, 75.281056, 83.463985, 85.510533, 86.026513, - 108.787480, 123.031136, 130.607284, 138.954406, 160.867784, 158.958882, - 27.062874, 32.195139, 24.147297, 22.114632, 35.580506, 52.551674, - 71.652956, 88.606776, 102.107193, 110.703186, 114.398733, 111.118539, - 121.503578, 132.455924, 139.490806, 161.412674, 193.563210, 172.203945, - 35.625692, 47.953028, 42.639820, 42.276254, 58.815664, 84.977282, - 110.656412, 126.168446, 134.658126, 140.604482, 144.006012, 141.702382, - 140.125323, 153.122630, 164.748041, 194.156197, 206.854650, 174.013079, - 49.516447, 65.335381, 71.738306, 81.872819, 98.400740, 136.840488, - 163.775802, 169.440078, 172.747876, 171.222919, 171.679604, 172.173550, - 168.200129, 187.617133, 199.683394, 207.768200, 210.062520, 175.478356, - 60.341673, 92.487135, 119.907299, 136.068010, 144.778950, 189.443534, - 220.120077, 219.641635, 214.616503, 205.894657, 198.453924, 200.013069, - 195.938103, 206.118661, 210.447375, 212.061379, 216.078218, 181.162805, - 78.422159, 112.242899, 158.416312, 181.404320, 193.188690, 229.296967, - 270.461799, 275.168977, 256.511701, 244.706786, 231.344608, 226.065087, - 222.248618, 218.662324, 217.966722, 218.248574, 218.818588, 182.740573, - 88.713664, 123.594164, 172.928179, 213.781414, 245.800351, 252.063414, - 313.283141, 331.703831, 305.866639, 285.177142, 269.759635, 251.988739, - 245.998388, 232.688076, 230.588702, 230.882657, 230.319053, 192.120741, - 102.540561, 152.905927, 189.137131, 241.806756, 273.868497, 284.258017, - 339.689853, 373.561104, 362.657463, 326.291984, 311.922687, 290.460189, - 276.774381, 273.012072, 277.751792, 279.123748, 278.820447, 233.813798, - 132.983118, 176.307242, 197.415684, 243.307787, 280.893995, 332.922370, - 340.329043, 404.530166, 419.475405, 375.775209, 351.300889, 340.042759, - 315.683832, 306.123530, 306.359319, 306.733063, 307.609556, 261.647847, - 149.579109, 185.925581, 207.937033, 245.159084, 301.890957, 350.040480, - 352.250771, 418.742329, 458.112686, 430.125208, 386.460441, 380.346839, - 354.679150, 337.305620, 334.504124, 335.889932, 341.060725, 286.898578, - 153.576812, 202.105624, 219.366967, 248.524506, 314.255692, 350.607526, - 390.567688, 408.629209, 488.000213, 480.563823, 432.461799, 410.412624, - 398.607371, 400.188740, 402.780916, 408.853470, 430.449735, 363.777088, - 161.353129, 214.848904, 231.549852, 258.536466, 313.163177, 368.140577, - 412.136393, 413.409032, 499.838438, 519.571063, 485.833867, 444.562715, - 435.738129, 442.358549, 450.166531, 453.208524, 458.424358, 385.823139, - 175.109034, 227.608058, 250.069563, 286.101747, 312.256740, 378.421485, - 413.344147, 435.058646, 476.960941, 542.448886, 530.189154, 495.408402, - 475.326752, 465.017144, 464.694045, 465.144689, 466.905382, 398.669138, - 184.750180, 240.766694, 283.240772, 305.480150, 322.409001, 374.526162, - 427.141326, 452.840323, 472.604139, 545.366105, 567.676694, 541.666203, - 509.591873, 492.044219, 492.778569, 493.765684, 493.235693, 413.684325, - 194.728357, 254.928927, 289.991157, 300.193195, 324.194589, 371.563147, - 439.226438, 468.295088, 495.654854, 533.506353, 587.476353, 578.298989, - 548.041942, 527.393885, 538.965146, 545.070442, 544.295454, 454.012211, - 205.195287, 283.135677, 297.921431, 319.295927, 355.621830, 392.466463, - 446.696167, 485.053519, 516.426615, 532.264584, 588.481600, 615.906737, - 589.319634, 555.754316, 558.389367, 569.094521, 569.779764, 475.384946, - 218.552054, 298.511016, 319.188338, 351.781666, 372.789510, 412.827434, - 464.569387, 506.270203, 533.049810, 553.347364, 580.644599, 632.759854, - 622.235843, 569.960552, 580.799340, 586.553714, 579.488366, 491.826482, - 244.803348, 299.790203, 324.187975, 363.280782, 403.710443, 441.724083, - 492.732682, 534.722691, 552.193622, 575.112647, 586.097705, 635.224970, - 644.642944, 606.017786, 640.321218, 642.316989, 616.397020, 548.300111, - 256.957358, 318.638991, 355.063346, 389.889307, 433.607315, 468.209001, - 515.178157, 573.556591, 578.113115, 587.246475, 601.762801, 638.454644, - 656.574853, 641.184609, 676.908189, 684.198162, 678.387412, 574.805864, - 251.211502, 323.448532, 364.227424, 411.792704, 462.226488, 503.572288, - 549.299249, 599.124071, 601.227977, 597.118176, 613.247552, 633.278532, - 658.074755, 664.930719, 685.731531, 693.632845, 693.076350, 578.326477, - 267.695377, 354.273736, 389.976833, 438.518178, 493.332686, 544.343027, - 588.895829, 620.206193, 628.327410, 606.067827, 620.998532, 657.985256, - 683.936059, 691.345257, 693.894723, 695.175306, 693.618786, 578.517148, - 274.290725, 363.465288, 411.808596, 463.369805, 515.310226, 581.009306, - 613.070738, 636.638714, 647.333929, 629.867603, 644.646319, 687.796202, - 702.859596, 713.495479, 704.068069, 704.991807, 704.188594, 587.283658, - 302.538449, 389.174737, 438.518422, 493.398902, 547.662399, 601.981814, - 624.773046, 641.629484, 644.699451, 645.848784, 668.033340, 703.643523, - 707.422408, 717.329600, 726.298973, 744.127507, 745.365167, 617.954068, - 310.328188, 410.984766, 463.369805, 515.315010, 581.309832, 613.787792, - 634.988538, 654.145284, 662.632978, 668.413496, 706.494057, 750.545471, - 730.724808, 730.002100, 743.625262, 750.801609, 745.308457, 606.505800, - 329.948756, 437.600191, 493.398902, 547.661910, 601.917884, 622.557745, - 633.244395, 644.055898, 648.224221, 665.062911, 763.555733, 812.391078, - 769.063582, 744.865168, 727.579796, 724.950408, 722.179707, 598.564510, - 350.848328, 462.437458, 515.315010, 581.309823, 613.779123, 634.465309, - 652.056257, 662.179143, 671.466297, 726.881256, 819.824030, 880.232789, - 810.371672, 754.246481, 725.053473, 724.253390, 723.503395, 603.394909, - 373.704088, 492.408266, 547.661910, 601.917884, 622.557620, 633.236320, - 644.023513, 648.232514, 666.381639, 785.498283, 929.441612, 999.772800, - 890.339033, 775.852504, 731.840181, 726.905100, 725.251844, 604.899901, - 394.473422, 514.261306, 581.309823, 613.779123, 634.465309, 652.056257, - 662.179143, 671.466557, 727.134512, 835.764144, 981.747089, 1018.462934, - 939.686967, 811.276731, 739.398459, 727.365647, 725.285425, 604.923525, - 419.976505, 546.538939, 601.917884, 622.557620, 633.236320, 644.023513, - 648.232514, 666.381639, 785.545191, 932.841398, 1036.609617, 1026.945092, - 963.822765, 840.827315, 755.532423, 730.241865, 725.366847, 604.924155, - 437.281359, 580.116337, 613.779123, 634.465309, 652.056257, 662.179143, - 671.466557, 727.134512, 835.764859, 981.996194, 1031.896881, 1002.544732, - 881.157178, 828.151494, 799.340975, 751.314325, 728.316587, 605.005504, - 464.713920, 600.649281, 622.557620, 633.236320, 644.023513, 648.232514, - 666.381639, 785.545191, 932.841398, 1036.735329, 1035.037004, 995.478339, - 858.093733, 823.471976, 819.881754, 798.749289, 749.440463, 607.955244, - 495.880237, 612.473139, 634.465309, 652.056257, 662.179143, 671.466557, - 727.134512, 835.764859, 981.996194, 1032.339788, 1031.105117, 995.303259, - 857.733663, 823.435877, 822.822791, 819.873050, 796.882480, 629.038445, - 510.391280, 621.158273, 633.236320, 644.023513, 648.232514, 666.381639, - 785.545191, 932.841398, 1036.735329, 1035.566013, 1029.599350, 994.926093, - 857.645648, 823.435143, 822.904139, 822.822791, 817.965681, 673.856962, - 514.588176, 632.947715, 652.056257, 662.179143, 671.466557, 727.134512, - 835.764859, 981.996194, 1032.339788, 1031.547475, 1023.835377, 972.158629, - 851.968626, 823.347128, 822.904770, 822.904139, 820.752301, 684.418900, - 520.013294, 631.668183, 644.023513, 648.232514, 666.381639, 785.545191, - 932.841398, 1036.735329, 1035.567378, 1029.776746, 1001.044108, 880.853721, - 829.201546, 822.994150, 822.904770, 822.904770, 820.792975, 684.582020, - 531.253628, 650.479606, 662.179143, 671.466557, 727.134512, 835.764859, - 981.996194, 1032.339788, 1031.636855, 1029.601779, 995.366703, 858.086641, - 823.524524, 822.906135, 822.904770, 822.904770, 820.792975, 684.582020, - 528.531744, 642.424501, 648.232514, 666.381639, 785.545191, 932.841398, - 1036.735329, 1035.567378, 1030.219103, 1029.576226, 995.278687, 857.733663, - 823.436508, 822.904770, 822.904770, 822.904770, 820.792975, 684.582020, - 545.401164, 660.550678, 671.508859, 727.304161, 835.807162, 981.996850, - 1032.339788, 1031.636855, 1030.130788, 1029.487827, 994.925709, 857.645648, - 823.435143, 822.904770, 822.904770, 822.904770, 820.792975, 684.582020, - 537.684760, 646.650947, 669.110131, 796.487512, 935.569890, 1036.777631, - 1035.567378, 1030.219103, 1030.018584, 1023.810805, 972.158629, 851.968626, - 823.347128, 822.904770, 822.904770, 822.904770, 820.792975, 684.582020, - 552.408370, 670.001885, 738.246482, 879.690154, 992.939171, 1032.509436, - 1031.636855, 1030.132153, 1029.665223, 1001.043724, 880.853721, 829.201546, - 822.994150, 822.904770, 822.904770, 822.904770, 820.792975, 684.582020, - 539.835902, 667.496388, 799.216004, 946.512211, 1039.506123, 1035.609680, - 1030.219103, 1030.107964, 1029.577207, 995.366703, 858.086641, 823.524524, - 822.906135, 822.904770, 822.904770, 822.904770, 820.792975, 684.582020, - 558.362529, 734.277451, 877.197218, 990.478243, 1029.908393, 1028.993978, - 1027.488620, 1027.464048, 1026.933674, 992.724534, 855.532488, 821.323349, - 820.792975, 820.792975, 820.792975, 820.792975, 818.686600, 682.825198, - 453.127195, 649.075095, 780.278390, 867.165890, 862.469711, 857.067460, - 856.956321, 856.955937, 856.513579, 827.981461, 713.556496, 685.024378, - 684.582020, 684.582020, 684.582020, 684.582020, 682.825198, 569.510056, -}; - -static const double interp_dgrid_surf[65 * 18] = { - 10.650434, 12.204694, 12.040917, 11.843008, 11.845578, 12.051535, 12.103583, - 12.136780, 12.266709, 12.299107, 12.299673, 12.303120, 12.316337, 12.293431, - 12.092165, 11.602421, 11.141559, 8.864495, 12.770003, 14.634889, 14.437149, - 14.199413, 14.202487, 14.449423, 14.511827, 14.551629, 14.707410, 14.746265, - 14.747610, 14.753705, 14.762194, 14.699395, 14.390525, 13.690970, 12.874168, - 10.367121, 12.832328, 14.790730, 14.503765, 14.236403, 14.239028, 14.486600, - 14.549164, 14.589069, 14.745250, 14.784258, 14.788320, 14.801930, 14.762798, - 14.499088, 14.021544, 13.469684, 12.661560, 10.108384, 12.950520, 15.264726, - 14.621957, 14.238236, 14.239028, 14.486601, 14.549264, 14.589469, 14.745361, - 14.784949, 14.791572, 14.798652, 14.660251, 14.119394, 13.651131, 12.935657, - 12.176082, 9.228999, 12.979992, 15.382918, 14.651428, 14.238693, 14.239028, - 14.486701, 14.555710, 14.615321, 14.751849, 14.787700, 14.797104, 14.743189, - 14.475057, 13.944406, 13.450468, 12.687876, 11.824993, 8.906683, 12.980449, - 15.384750, 14.651885, 14.238700, 14.239028, 14.487102, 14.581562, 14.718998, - 14.777721, 14.788445, 14.778661, 14.582790, 14.099785, 13.649637, 12.935359, - 12.201859, 10.891931, 8.482221, 12.980449, 15.384750, 14.651886, 14.238801, - 14.239434, 14.487303, 14.588010, 14.744860, 14.784773, 14.786094, 14.735647, - 14.455704, 13.939591, 13.450393, 12.687876, 11.849334, 10.476658, 8.043672, - 12.980449, 15.384750, 14.651987, 14.245320, 14.265579, 14.493824, 14.588211, - 14.745312, 14.787263, 14.775934, 14.582036, 14.099475, 13.649563, 12.935358, - 12.201859, 10.911285, 9.730570, 6.696921, 12.980449, 15.384750, 14.652393, - 14.271466, 14.370434, 14.520069, 14.589027, 14.746028, 14.785482, 14.735605, - 14.455693, 13.939590, 13.450393, 12.687876, 11.849334, 10.494514, 9.195398, - 6.215460, 12.980449, 15.384750, 14.652494, 14.277985, 14.396679, 14.533035, - 14.615021, 14.754825, 14.775610, 14.582796, 14.099664, 13.649565, 12.935358, - 12.201859, 10.911285, 9.747361, 7.779960, 5.617541, 12.980448, 15.384731, - 14.652415, 14.278078, 14.397578, 14.559053, 14.718657, 14.776398, 14.747044, - 14.504690, 13.951810, 13.450583, 12.687876, 11.849334, 10.494514, 9.210817, - 7.210003, 5.164575, 12.980446, 15.383448, 14.647073, 14.277541, 14.403813, - 14.569546, 14.744956, 14.765103, 14.629073, 14.296161, 13.698573, 12.936118, - 12.201859, 10.911285, 9.747361, 7.790897, 6.322998, 3.931551, 12.981550, - 15.376916, 14.615597, 14.274820, 14.437479, 14.575942, 14.707492, 14.734111, - 14.515975, 14.000806, 13.462803, 12.688066, 11.849334, 10.494514, 9.210817, - 7.219566, 5.781392, 3.486081, 12.991899, 15.376201, 14.579444, 14.296898, - 14.473361, 14.522910, 14.491600, 14.543267, 14.288580, 13.700311, 12.936579, - 12.201867, 10.911285, 9.747361, 7.790897, 6.331506, 4.480348, 2.923138, - 13.019848, 15.383477, 14.582260, 14.385262, 14.452673, 14.436019, 14.238174, - 14.255993, 13.977481, 13.532342, 12.705591, 11.849605, 10.494514, 9.210817, - 7.219566, 5.789642, 4.018194, 2.766222, 13.028558, 15.315782, 14.439141, - 14.326286, 14.452429, 14.311731, 14.033235, 13.922587, 13.665868, 13.207897, - 12.274375, 10.912967, 9.747371, 7.790897, 6.331506, 4.488594, 3.454993, - 2.692682, 12.992752, 15.321471, 14.409573, 14.236340, 14.322969, 14.049072, - 13.764823, 13.479242, 13.250105, 12.759133, 12.019174, 10.532951, 9.211409, - 7.219566, 5.789642, 4.026440, 3.298077, 2.674624, 12.945493, 15.276596, - 14.315745, 14.026198, 14.085774, 13.844563, 13.447576, 12.964935, 12.735525, - 12.288592, 11.511693, 9.900227, 7.793270, 6.331506, 4.488594, 3.463236, - 3.224318, 2.672433, 12.757570, 15.056661, 14.095011, 13.722362, 13.812624, - 13.608480, 13.021206, 12.367627, 11.937931, 11.581049, 10.599552, 9.247860, - 7.220151, 5.789642, 4.026437, 3.305882, 3.191260, 2.615317, 12.581293, - 14.824658, 13.909074, 13.496158, 13.491402, 13.221550, 12.514140, 11.677229, - 10.936895, 10.619912, 9.634779, 7.763570, 6.331082, 4.488590, 3.462798, - 3.216460, 3.076315, 2.373499, 12.283499, 14.455760, 13.890593, 13.427587, - 13.183783, 12.763833, 11.861006, 10.740618, 9.820756, 9.354945, 8.669862, - 7.123268, 5.787860, 4.025994, 3.290000, 3.084410, 2.810905, 2.222916, - 12.010893, 14.300919, 13.986624, 13.484026, 13.025385, 12.224281, 11.064265, - 9.631040, 8.594396, 8.003736, 7.561587, 6.274418, 4.466637, 3.446574, - 3.102467, 2.816989, 2.598688, 1.951541, 11.581477, 13.831132, 13.632027, - 13.380414, 12.807880, 11.665651, 10.218236, 8.562237, 7.222614, 6.611808, - 6.261676, 5.402793, 3.938544, 3.174375, 2.818166, 2.602758, 2.213911, - 1.434763, 11.050735, 12.893449, 12.363152, 12.712829, 12.012961, 10.887854, - 9.109699, 7.421701, 5.965603, 5.272129, 4.991435, 4.423000, 3.369988, - 2.800371, 2.593901, 2.217431, 1.670917, 1.215265, 10.641194, 11.766277, - 10.777082, 10.972917, 10.689298, 9.701545, 7.719947, 6.145654, 4.872442, - 4.099600, 3.880934, 3.514159, 2.786474, 2.368963, 2.162376, 1.673670, - 1.450770, 1.185424, 10.071964, 11.107701, 9.172361, 8.551313, 8.412080, - 7.641397, 6.174246, 4.853916, 3.904549, 3.246810, 2.959903, 2.785066, - 2.240001, 1.793166, 1.585520, 1.449824, 1.405368, 1.168856, 9.213182, - 9.173278, 7.219231, 6.242951, 5.626013, 5.768007, 4.908666, 3.809589, - 3.115109, 2.617899, 2.274793, 2.172960, 1.838597, 1.505915, 1.414333, - 1.392666, 1.338173, 1.105611, 7.365015, 7.471370, 5.622346, 4.520127, - 3.936272, 4.208822, 3.623024, 2.977794, 2.450003, 2.097261, 1.824090, - 1.643270, 1.473525, 1.351388, 1.327504, 1.323865, 1.307894, 1.088234, - 6.198210, 6.580712, 4.682511, 3.416952, 2.941929, 2.766637, 2.650686, - 2.315439, 1.925838, 1.659784, 1.464419, 1.252806, 1.162722, 1.197518, - 1.199875, 1.197365, 1.194040, 0.995797, 5.402507, 5.055466, 3.728724, - 2.624359, 2.165810, 1.943189, 1.918190, 1.738078, 1.516328, 1.290520, - 1.155793, 1.015962, 0.881900, 0.807203, 0.754242, 0.743378, 0.740288, - 0.614158, 3.937867, 3.862507, 2.884664, 2.088147, 1.648496, 1.473584, - 1.340123, 1.291769, 1.165381, 1.000224, 0.893316, 0.821333, 0.691363, - 0.610501, 0.586766, 0.583762, 0.577840, 0.468733, 3.104660, 3.181078, - 2.420208, 1.747442, 1.297956, 1.109835, 0.970385, 0.943229, 0.876923, - 0.777584, 0.678183, 0.628623, 0.553745, 0.523430, 0.519490, 0.514394, - 0.492259, 0.403172, 2.593833, 2.533720, 2.010452, 1.480944, 1.060302, - 0.846383, 0.738703, 0.673144, 0.658010, 0.592449, 0.518236, 0.470335, - 0.425088, 0.393168, 0.378116, 0.355846, 0.275469, 0.213128, 2.176988, - 2.089575, 1.671284, 1.225008, 0.895382, 0.672008, 0.566241, 0.496746, - 0.488005, 0.449874, 0.400899, 0.354002, 0.318150, 0.281533, 0.238545, - 0.224159, 0.202399, 0.160681, 1.874679, 1.769165, 1.430124, 1.068727, - 0.780272, 0.557801, 0.441643, 0.377256, 0.352957, 0.338452, 0.304965, - 0.273172, 0.240052, 0.208724, 0.193431, 0.190845, 0.185025, 0.138166, - 1.590226, 1.502830, 1.193127, 0.917885, 0.670432, 0.474546, 0.355420, - 0.292305, 0.259035, 0.249937, 0.232079, 0.208943, 0.181936, 0.160038, - 0.152257, 0.151235, 0.149583, 0.120747, 1.331730, 1.255907, 1.012871, - 0.778422, 0.578977, 0.412432, 0.293155, 0.231824, 0.197187, 0.183921, - 0.174876, 0.157252, 0.140263, 0.127050, 0.110244, 0.105041, 0.104323, - 0.086944, 1.153994, 1.118771, 0.822355, 0.612321, 0.478249, 0.348222, - 0.247408, 0.186141, 0.152714, 0.135445, 0.129810, 0.119994, 0.115619, - 0.131626, 0.095612, 0.079343, 0.077502, 0.064550, 0.946317, 0.925894, - 0.677969, 0.499906, 0.397101, 0.297931, 0.214467, 0.152333, 0.120731, - 0.102686, 0.095062, 0.090361, 0.122319, 0.240194, 0.112687, 0.070690, - 0.070461, 0.054194, 0.824155, 0.787241, 0.581856, 0.419228, 0.313167, - 0.245582, 0.183500, 0.128101, 0.096577, 0.080267, 0.071022, 0.066851, - 0.085754, 0.154163, 0.075884, 0.052401, 0.054270, 0.026656, 0.716310, - 0.671378, 0.489580, 0.349569, 0.256155, 0.206343, 0.157853, 0.111950, - 0.079271, 0.062518, 0.053441, 0.049660, 0.051400, 0.063778, 0.039993, - 0.029133, 0.023382, 0.013725, 0.614125, 0.579096, 0.417126, 0.299465, - 0.217849, 0.165515, 0.129040, 0.093127, 0.065612, 0.049543, 0.041429, - 0.036850, 0.034416, 0.033989, 0.024216, 0.017377, 0.014833, 0.011987, - 0.520407, 0.487239, 0.349473, 0.251741, 0.184897, 0.135813, 0.107098, - 0.073607, 0.053938, 0.040531, 0.032931, 0.028876, 0.025759, 0.022168, - 0.016739, 0.014638, 0.014333, 0.011947, 0.449954, 0.415124, 0.299452, - 0.216942, 0.158874, 0.115334, 0.088821, 0.060105, 0.042610, 0.032566, - 0.026903, 0.023123, 0.019913, 0.016835, 0.014306, 0.013625, 0.013535, - 0.011284, 0.377618, 0.347773, 0.251741, 0.184839, 0.132857, 0.095439, - 0.070462, 0.052244, 0.036078, 0.026025, 0.021518, 0.018487, 0.015361, - 0.012905, 0.011470, 0.010569, 0.010283, 0.008297, 0.319953, 0.297976, - 0.216942, 0.158842, 0.113280, 0.080426, 0.057367, 0.041987, 0.030135, - 0.022295, 0.017901, 0.015121, 0.012224, 0.010035, 0.009353, 0.009108, - 0.008695, 0.006139, 0.267864, 0.250502, 0.184839, 0.132851, 0.095039, - 0.068220, 0.049135, 0.035315, 0.025144, 0.018237, 0.013857, 0.012094, - 0.009715, 0.007743, 0.006937, 0.006446, 0.006243, 0.004929, 0.230449, - 0.215895, 0.158842, 0.113280, 0.080417, 0.057174, 0.041304, 0.029959, - 0.021866, 0.015673, 0.012133, 0.010083, 0.007801, 0.006053, 0.005401, - 0.003834, 0.003429, 0.002851, 0.193984, 0.183963, 0.132851, 0.095039, - 0.068220, 0.049133, 0.035305, 0.025140, 0.018150, 0.013175, 0.010422, - 0.008491, 0.006397, 0.004567, 0.003494, 0.002933, 0.002825, 0.002355, - 0.167298, 0.158088, 0.113280, 0.080417, 0.057174, 0.041304, 0.029959, - 0.021866, 0.015669, 0.011955, 0.009257, 0.007051, 0.005543, 0.003905, - 0.002984, 0.002825, 0.002814, 0.002347, 0.143228, 0.132220, 0.095039, - 0.068220, 0.049133, 0.035305, 0.025140, 0.018150, 0.013174, 0.010394, - 0.008403, 0.006661, 0.005378, 0.003545, 0.002876, 0.002818, 0.002814, - 0.002347, 0.122934, 0.112735, 0.080417, 0.057174, 0.041304, 0.029959, - 0.021866, 0.015669, 0.011955, 0.009258, 0.007182, 0.006012, 0.003762, - 0.002866, 0.002739, 0.002788, 0.002810, 0.002347, 0.101934, 0.094569, - 0.068220, 0.049133, 0.035305, 0.025140, 0.018150, 0.013174, 0.010394, - 0.008405, 0.006797, 0.005845, 0.003333, 0.002703, 0.002695, 0.002723, - 0.002781, 0.002343, 0.086702, 0.080014, 0.057174, 0.041304, 0.029959, - 0.021866, 0.015669, 0.011955, 0.009258, 0.007190, 0.006533, 0.005839, - 0.003326, 0.002700, 0.002690, 0.002694, 0.002716, 0.002314, 0.073040, - 0.067886, 0.049133, 0.035305, 0.025140, 0.018150, 0.013174, 0.010394, - 0.008405, 0.006807, 0.006468, 0.005831, 0.003325, 0.002700, 0.002690, - 0.002690, 0.002687, 0.002253, 0.061685, 0.056890, 0.041304, 0.029959, - 0.021866, 0.015669, 0.011955, 0.009258, 0.007190, 0.006542, 0.006360, - 0.005416, 0.003221, 0.002698, 0.002690, 0.002690, 0.002683, 0.002238, - 0.052465, 0.048894, 0.035305, 0.025140, 0.018150, 0.013174, 0.010394, - 0.008405, 0.006807, 0.006472, 0.005943, 0.003748, 0.002805, 0.002692, - 0.002690, 0.002690, 0.002683, 0.002238, 0.043838, 0.041101, 0.029959, - 0.021866, 0.015669, 0.011955, 0.009258, 0.007190, 0.006543, 0.006465, - 0.005839, 0.003333, 0.002702, 0.002690, 0.002690, 0.002690, 0.002683, - 0.002238, 0.037824, 0.035133, 0.025140, 0.018150, 0.013174, 0.010394, - 0.008405, 0.006807, 0.006480, 0.006464, 0.005838, 0.003326, 0.002700, - 0.002690, 0.002690, 0.002690, 0.002683, 0.002238, 0.031865, 0.029815, - 0.021866, 0.015668, 0.011955, 0.009258, 0.007190, 0.006543, 0.006475, - 0.006462, 0.005831, 0.003325, 0.002700, 0.002690, 0.002690, 0.002690, - 0.002683, 0.002238, 0.027150, 0.025016, 0.018128, 0.013083, 0.010371, - 0.008405, 0.006807, 0.006480, 0.006472, 0.006359, 0.005416, 0.003221, - 0.002698, 0.002690, 0.002690, 0.002690, 0.002683, 0.002238, 0.023094, - 0.021760, 0.015577, 0.011590, 0.009167, 0.007188, 0.006543, 0.006475, - 0.006466, 0.005943, 0.003748, 0.002805, 0.002692, 0.002690, 0.002690, - 0.002690, 0.002683, 0.002238, 0.019269, 0.018038, 0.013060, 0.010280, - 0.008382, 0.006806, 0.006480, 0.006474, 0.006464, 0.005839, 0.003333, - 0.002702, 0.002690, 0.002690, 0.002690, 0.002690, 0.002683, 0.002238, - 0.016874, 0.015472, 0.011566, 0.009148, 0.007171, 0.006527, 0.006458, - 0.006457, 0.006447, 0.005823, 0.003318, 0.002693, 0.002683, 0.002683, - 0.002683, 0.002683, 0.002676, 0.002232, 0.011968, 0.011056, 0.008762, - 0.007219, 0.005717, 0.005391, 0.005386, 0.005386, 0.005377, 0.004856, - 0.002767, 0.002246, 0.002238, 0.002238, 0.002238, 0.002238, 0.002232, - 0.001862, -}; - -void av1_model_rd_surffit(double xm, double yl, double *rate_f, - double *dist_f) { - const double x_start = -0.5; - const double x_end = 16.5; - const double x_step = 1; - const double y_start = -15.5; - const double y_end = 16.5; - const double y_step = 0.5; - const double epsilon = 1e-6; - const int stride = (int)rint((x_end - x_start) / x_step) + 1; - (void)y_end; - - xm = AOMMAX(xm, x_start + x_step + epsilon); - xm = AOMMIN(xm, x_end - x_step - epsilon); - yl = AOMMAX(yl, y_start + y_step + epsilon); - yl = AOMMIN(yl, y_end - y_step - epsilon); - - const double y = (yl - y_start) / y_step; - const double x = (xm - x_start) / x_step; - - const int yi = (int)floor(y); - const int xi = (int)floor(x); - assert(xi > 0); - assert(yi > 0); - - const double yo = y - yi; - const double xo = x - xi; - const double *prate = &interp_rgrid_surf[(yi - 1) * stride + (xi - 1)]; - const double *pdist = &interp_dgrid_surf[(yi - 1) * stride + (xi - 1)]; - *rate_f = interp_bicubic(prate, stride, xo, yo); - *dist_f = interp_bicubic(pdist, stride, xo, yo); -} - -static const double interp_rgrid_curv[65] = { - 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, - 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, - 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 4.759876, - 8.132086, 13.651828, 21.908271, 33.522054, 48.782376, 71.530983, - 106.728649, 151.942795, 199.893011, 242.850965, 283.933923, 322.154203, - 360.684608, 394.801656, 426.879017, 460.234313, 484.103987, 508.261495, - 536.486763, 558.196737, 586.285894, 614.764511, 634.166333, 647.706472, - 658.211478, 681.360407, 701.052141, 727.007310, 768.663973, 804.407660, - 884.627751, 1065.658131, 1238.875214, 1440.185176, 1678.377931, 1962.243390, - 2300.571467, 2702.152072, 3175.775119, 3730.230519, 4374.308184, 5116.798028, - 5966.489961, 6932.173897, 8022.639747, 9246.677424, 10613.076839, -}; - -static const double interp_dgrid_curv[65] = { - 14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.604855, - 14.604855, 14.604855, 14.604855, 14.604855, 14.604855, 14.555776, 14.533692, - 14.439920, 14.257791, 13.977230, 13.623229, 13.064884, 12.355411, 11.560773, - 10.728960, 9.861975, 8.643612, 6.916021, 5.154769, 3.734940, 2.680051, - 1.925506, 1.408410, 1.042223, 0.767641, 0.565392, 0.420116, 0.310427, - 0.231711, 0.172999, 0.128293, 0.094992, 0.072171, 0.052972, 0.039354, - 0.029555, 0.022857, 0.016832, 0.013297, 0.000000, 0.000000, 0.000000, - 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, - 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, - 0.000000, 0.000000, -}; - -void av1_model_rd_curvfit(double xqr, double *rate_f, double *distbysse_f) { - const double x_start = -15.5; - const double x_end = 16.5; - const double x_step = 0.5; - const double epsilon = 1e-6; - (void)x_end; - - xqr = AOMMAX(xqr, x_start + x_step + epsilon); - xqr = AOMMIN(xqr, x_end - x_step - epsilon); - const double x = (xqr - x_start) / x_step; - const int xi = (int)floor(x); - const double xo = x - xi; - - assert(xi > 0); - - const double *prate = &interp_rgrid_curv[(xi - 1)]; - const double *pdist = &interp_dgrid_curv[(xi - 1)]; - *rate_f = interp_cubic(prate, xo); - *distbysse_f = interp_cubic(pdist, xo); -} - -static void get_entropy_contexts_plane(BLOCK_SIZE plane_bsize, - const struct macroblockd_plane *pd, - ENTROPY_CONTEXT t_above[MAX_MIB_SIZE], - ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) { - const int num_4x4_w = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - const int num_4x4_h = block_size_high[plane_bsize] >> tx_size_high_log2[0]; - const ENTROPY_CONTEXT *const above = pd->above_context; - const ENTROPY_CONTEXT *const left = pd->left_context; - - memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); - memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h); -} - -void av1_get_entropy_contexts(BLOCK_SIZE bsize, - const struct macroblockd_plane *pd, - ENTROPY_CONTEXT t_above[MAX_MIB_SIZE], - ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]) { - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); - get_entropy_contexts_plane(plane_bsize, pd, t_above, t_left); -} - -void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, - int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) { - int i; - int zero_seen = 0; - int best_sad = INT_MAX; - int this_sad = INT_MAX; - int max_mv = 0; - uint8_t *src_y_ptr = x->plane[0].src.buf; - uint8_t *ref_y_ptr; - MV pred_mv[MAX_MV_REF_CANDIDATES + 1]; - int num_mv_refs = 0; - const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, NONE_FRAME }; - const int_mv ref_mv = - av1_get_ref_mv_from_stack(0, ref_frames, 0, x->mbmi_ext); - const int_mv ref_mv1 = - av1_get_ref_mv_from_stack(0, ref_frames, 1, x->mbmi_ext); - - pred_mv[num_mv_refs++] = ref_mv.as_mv; - if (ref_mv.as_int != ref_mv1.as_int) { - pred_mv[num_mv_refs++] = ref_mv1.as_mv; - } - if (cpi->sf.adaptive_motion_search && block_size < x->max_partition_size) - pred_mv[num_mv_refs++] = x->pred_mv[ref_frame]; - - assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0]))); - - // Get the sad for each candidate reference mv. - for (i = 0; i < num_mv_refs; ++i) { - const MV *this_mv = &pred_mv[i]; - int fp_row, fp_col; - fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3; - fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3; - max_mv = AOMMAX(max_mv, AOMMAX(abs(this_mv->row), abs(this_mv->col)) >> 3); - - if (fp_row == 0 && fp_col == 0 && zero_seen) continue; - zero_seen |= (fp_row == 0 && fp_col == 0); - - ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col]; - // Find sad for current vector. - this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, - ref_y_ptr, ref_y_stride); - // Note if it is the best so far. - if (this_sad < best_sad) { - best_sad = this_sad; - } - } - - // Note the index of the mv that worked best in the reference list. - x->max_mv_context[ref_frame] = max_mv; - x->pred_mv_sad[ref_frame] = best_sad; -} - -void av1_setup_pred_block(const MACROBLOCKD *xd, - struct buf_2d dst[MAX_MB_PLANE], - const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, - const struct scale_factors *scale, - const struct scale_factors *scale_uv, - const int num_planes) { - int i; - - dst[0].buf = src->y_buffer; - dst[0].stride = src->y_stride; - dst[1].buf = src->u_buffer; - dst[2].buf = src->v_buffer; - dst[1].stride = dst[2].stride = src->uv_stride; - - for (i = 0; i < num_planes; ++i) { - setup_pred_plane(dst + i, xd->mi[0]->sb_type, dst[i].buf, - i ? src->uv_crop_width : src->y_crop_width, - i ? src->uv_crop_height : src->y_crop_height, - dst[i].stride, mi_row, mi_col, i ? scale_uv : scale, - xd->plane[i].subsampling_x, xd->plane[i].subsampling_y); - } -} - -int av1_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block, - int stride) { - const int bw = mi_size_wide_log2[plane_bsize]; - const int y = 4 * (raster_block >> bw); - const int x = 4 * (raster_block & ((1 << bw) - 1)); - return y * stride + x; -} - -int16_t *av1_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block, - int16_t *base) { - const int stride = block_size_wide[plane_bsize]; - return base + av1_raster_block_offset(plane_bsize, raster_block, stride); -} - -YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const AV1_COMP *cpi, - int ref_frame) { - const AV1_COMMON *const cm = &cpi->common; - const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1]; - const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame); - return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) - ? &cm->buffer_pool->frame_bufs[scaled_idx].buf - : NULL; -} - -int av1_get_switchable_rate(const AV1_COMMON *const cm, MACROBLOCK *x, - const MACROBLOCKD *xd) { - if (cm->interp_filter == SWITCHABLE) { - const MB_MODE_INFO *const mbmi = xd->mi[0]; - int inter_filter_cost = 0; - int dir; - - for (dir = 0; dir < 2; ++dir) { - const int ctx = av1_get_pred_context_switchable_interp(xd, dir); - const InterpFilter filter = - av1_extract_interp_filter(mbmi->interp_filters, dir); - inter_filter_cost += x->switchable_interp_costs[ctx][filter]; - } - return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost; - } else { - return 0; - } -} - -void av1_set_rd_speed_thresholds(AV1_COMP *cpi) { - int i; - RD_OPT *const rd = &cpi->rd; - SPEED_FEATURES *const sf = &cpi->sf; - - // Set baseline threshold values. - for (i = 0; i < MAX_MODES; ++i) rd->thresh_mult[i] = cpi->oxcf.mode == 0; - - if (sf->adaptive_rd_thresh) { - rd->thresh_mult[THR_NEARESTMV] = 300; - rd->thresh_mult[THR_NEARESTL2] = 300; - rd->thresh_mult[THR_NEARESTL3] = 300; - rd->thresh_mult[THR_NEARESTB] = 300; - rd->thresh_mult[THR_NEARESTA2] = 300; - rd->thresh_mult[THR_NEARESTA] = 300; - rd->thresh_mult[THR_NEARESTG] = 300; - } else { - rd->thresh_mult[THR_NEARESTMV] = 0; - rd->thresh_mult[THR_NEARESTL2] = 0; - rd->thresh_mult[THR_NEARESTL3] = 0; - rd->thresh_mult[THR_NEARESTB] = 0; - rd->thresh_mult[THR_NEARESTA2] = 0; - rd->thresh_mult[THR_NEARESTA] = 0; - rd->thresh_mult[THR_NEARESTG] = 0; - } - - rd->thresh_mult[THR_NEWMV] += 1000; - rd->thresh_mult[THR_NEWL2] += 1000; - rd->thresh_mult[THR_NEWL3] += 1000; - rd->thresh_mult[THR_NEWB] += 1000; - rd->thresh_mult[THR_NEWA2] = 1000; - rd->thresh_mult[THR_NEWA] += 1000; - rd->thresh_mult[THR_NEWG] += 1000; - - rd->thresh_mult[THR_NEARMV] += 1000; - rd->thresh_mult[THR_NEARL2] += 1000; - rd->thresh_mult[THR_NEARL3] += 1000; - rd->thresh_mult[THR_NEARB] += 1000; - rd->thresh_mult[THR_NEARA2] = 1000; - rd->thresh_mult[THR_NEARA] += 1000; - rd->thresh_mult[THR_NEARG] += 1000; - - rd->thresh_mult[THR_GLOBALMV] += 2000; - rd->thresh_mult[THR_GLOBALL2] += 2000; - rd->thresh_mult[THR_GLOBALL3] += 2000; - rd->thresh_mult[THR_GLOBALB] += 2000; - rd->thresh_mult[THR_GLOBALA2] = 2000; - rd->thresh_mult[THR_GLOBALG] += 2000; - rd->thresh_mult[THR_GLOBALA] += 2000; - - rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA] += 1000; - rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A] += 1000; - rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A] += 1000; - rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA] += 1000; - rd->thresh_mult[THR_COMP_NEAREST_NEARESTLB] += 1000; - rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2B] += 1000; - rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3B] += 1000; - rd->thresh_mult[THR_COMP_NEAREST_NEARESTGB] += 1000; - rd->thresh_mult[THR_COMP_NEAREST_NEARESTLA2] += 1000; - rd->thresh_mult[THR_COMP_NEAREST_NEARESTL2A2] += 1000; - rd->thresh_mult[THR_COMP_NEAREST_NEARESTL3A2] += 1000; - rd->thresh_mult[THR_COMP_NEAREST_NEARESTGA2] += 1000; - - rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL2] += 2000; - rd->thresh_mult[THR_COMP_NEAREST_NEARESTLL3] += 2000; - rd->thresh_mult[THR_COMP_NEAREST_NEARESTLG] += 2000; - rd->thresh_mult[THR_COMP_NEAREST_NEARESTBA] += 2000; - - rd->thresh_mult[THR_COMP_NEAR_NEARLA] += 1200; - rd->thresh_mult[THR_COMP_NEAREST_NEWLA] += 1500; - rd->thresh_mult[THR_COMP_NEW_NEARESTLA] += 1500; - rd->thresh_mult[THR_COMP_NEAR_NEWLA] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEARLA] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEWLA] += 2000; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA] += 2500; - - rd->thresh_mult[THR_COMP_NEAR_NEARL2A] += 1200; - rd->thresh_mult[THR_COMP_NEAREST_NEWL2A] += 1500; - rd->thresh_mult[THR_COMP_NEW_NEARESTL2A] += 1500; - rd->thresh_mult[THR_COMP_NEAR_NEWL2A] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEARL2A] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEWL2A] += 2000; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A] += 2500; - - rd->thresh_mult[THR_COMP_NEAR_NEARL3A] += 1200; - rd->thresh_mult[THR_COMP_NEAREST_NEWL3A] += 1500; - rd->thresh_mult[THR_COMP_NEW_NEARESTL3A] += 1500; - rd->thresh_mult[THR_COMP_NEAR_NEWL3A] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEARL3A] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEWL3A] += 2000; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A] += 2500; - - rd->thresh_mult[THR_COMP_NEAR_NEARGA] += 1200; - rd->thresh_mult[THR_COMP_NEAREST_NEWGA] += 1500; - rd->thresh_mult[THR_COMP_NEW_NEARESTGA] += 1500; - rd->thresh_mult[THR_COMP_NEAR_NEWGA] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEARGA] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEWGA] += 2000; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA] += 2500; - - rd->thresh_mult[THR_COMP_NEAR_NEARLB] += 1200; - rd->thresh_mult[THR_COMP_NEAREST_NEWLB] += 1500; - rd->thresh_mult[THR_COMP_NEW_NEARESTLB] += 1500; - rd->thresh_mult[THR_COMP_NEAR_NEWLB] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEARLB] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEWLB] += 2000; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLB] += 2500; - - rd->thresh_mult[THR_COMP_NEAR_NEARL2B] += 1200; - rd->thresh_mult[THR_COMP_NEAREST_NEWL2B] += 1500; - rd->thresh_mult[THR_COMP_NEW_NEARESTL2B] += 1500; - rd->thresh_mult[THR_COMP_NEAR_NEWL2B] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEARL2B] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEWL2B] += 2000; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2B] += 2500; - - rd->thresh_mult[THR_COMP_NEAR_NEARL3B] += 1200; - rd->thresh_mult[THR_COMP_NEAREST_NEWL3B] += 1500; - rd->thresh_mult[THR_COMP_NEW_NEARESTL3B] += 1500; - rd->thresh_mult[THR_COMP_NEAR_NEWL3B] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEARL3B] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEWL3B] += 2000; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3B] += 2500; - - rd->thresh_mult[THR_COMP_NEAR_NEARGB] += 1200; - rd->thresh_mult[THR_COMP_NEAREST_NEWGB] += 1500; - rd->thresh_mult[THR_COMP_NEW_NEARESTGB] += 1500; - rd->thresh_mult[THR_COMP_NEAR_NEWGB] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEARGB] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEWGB] += 2000; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGB] += 2500; - - rd->thresh_mult[THR_COMP_NEAR_NEARLA2] += 1200; - rd->thresh_mult[THR_COMP_NEAREST_NEWLA2] += 1500; - rd->thresh_mult[THR_COMP_NEW_NEARESTLA2] += 1500; - rd->thresh_mult[THR_COMP_NEAR_NEWLA2] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEARLA2] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEWLA2] += 2000; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLA2] += 2500; - - rd->thresh_mult[THR_COMP_NEAR_NEARL2A2] += 1200; - rd->thresh_mult[THR_COMP_NEAREST_NEWL2A2] += 1500; - rd->thresh_mult[THR_COMP_NEW_NEARESTL2A2] += 1500; - rd->thresh_mult[THR_COMP_NEAR_NEWL2A2] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEARL2A2] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEWL2A2] += 2000; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL2A2] += 2500; - - rd->thresh_mult[THR_COMP_NEAR_NEARL3A2] += 1200; - rd->thresh_mult[THR_COMP_NEAREST_NEWL3A2] += 1500; - rd->thresh_mult[THR_COMP_NEW_NEARESTL3A2] += 1500; - rd->thresh_mult[THR_COMP_NEAR_NEWL3A2] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEARL3A2] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEWL3A2] += 2000; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALL3A2] += 2500; - - rd->thresh_mult[THR_COMP_NEAR_NEARGA2] += 1200; - rd->thresh_mult[THR_COMP_NEAREST_NEWGA2] += 1500; - rd->thresh_mult[THR_COMP_NEW_NEARESTGA2] += 1500; - rd->thresh_mult[THR_COMP_NEAR_NEWGA2] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEARGA2] += 1700; - rd->thresh_mult[THR_COMP_NEW_NEWGA2] += 2000; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALGA2] += 2500; - - rd->thresh_mult[THR_COMP_NEAR_NEARLL2] += 1600; - rd->thresh_mult[THR_COMP_NEAREST_NEWLL2] += 2000; - rd->thresh_mult[THR_COMP_NEW_NEARESTLL2] += 2000; - rd->thresh_mult[THR_COMP_NEAR_NEWLL2] += 2200; - rd->thresh_mult[THR_COMP_NEW_NEARLL2] += 2200; - rd->thresh_mult[THR_COMP_NEW_NEWLL2] += 2400; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL2] += 3200; - - rd->thresh_mult[THR_COMP_NEAR_NEARLL3] += 1600; - rd->thresh_mult[THR_COMP_NEAREST_NEWLL3] += 2000; - rd->thresh_mult[THR_COMP_NEW_NEARESTLL3] += 2000; - rd->thresh_mult[THR_COMP_NEAR_NEWLL3] += 2200; - rd->thresh_mult[THR_COMP_NEW_NEARLL3] += 2200; - rd->thresh_mult[THR_COMP_NEW_NEWLL3] += 2400; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLL3] += 3200; - - rd->thresh_mult[THR_COMP_NEAR_NEARLG] += 1600; - rd->thresh_mult[THR_COMP_NEAREST_NEWLG] += 2000; - rd->thresh_mult[THR_COMP_NEW_NEARESTLG] += 2000; - rd->thresh_mult[THR_COMP_NEAR_NEWLG] += 2200; - rd->thresh_mult[THR_COMP_NEW_NEARLG] += 2200; - rd->thresh_mult[THR_COMP_NEW_NEWLG] += 2400; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALLG] += 3200; - - rd->thresh_mult[THR_COMP_NEAR_NEARBA] += 1600; - rd->thresh_mult[THR_COMP_NEAREST_NEWBA] += 2000; - rd->thresh_mult[THR_COMP_NEW_NEARESTBA] += 2000; - rd->thresh_mult[THR_COMP_NEAR_NEWBA] += 2200; - rd->thresh_mult[THR_COMP_NEW_NEARBA] += 2200; - rd->thresh_mult[THR_COMP_NEW_NEWBA] += 2400; - rd->thresh_mult[THR_COMP_GLOBAL_GLOBALBA] += 3200; - - rd->thresh_mult[THR_DC] += 1000; - rd->thresh_mult[THR_PAETH] += 1000; - rd->thresh_mult[THR_SMOOTH] += 2000; - rd->thresh_mult[THR_SMOOTH_V] += 2000; - rd->thresh_mult[THR_SMOOTH_H] += 2000; - rd->thresh_mult[THR_H_PRED] += 2000; - rd->thresh_mult[THR_V_PRED] += 2000; - rd->thresh_mult[THR_D135_PRED] += 2500; - rd->thresh_mult[THR_D203_PRED] += 2500; - rd->thresh_mult[THR_D157_PRED] += 2500; - rd->thresh_mult[THR_D67_PRED] += 2500; - rd->thresh_mult[THR_D113_PRED] += 2500; - rd->thresh_mult[THR_D45_PRED] += 2500; -} - -void av1_set_rd_speed_thresholds_sub8x8(AV1_COMP *cpi) { - static const int thresh_mult[MAX_REFS] = { 2500, 2500, 2500, 2500, 2500, - 2500, 2500, 4500, 4500, 4500, - 4500, 4500, 4500, 4500, 4500, - 4500, 4500, 4500, 4500, 2500 }; - RD_OPT *const rd = &cpi->rd; - memcpy(rd->thresh_mult_sub8x8, thresh_mult, sizeof(thresh_mult)); -} - -void av1_update_rd_thresh_fact(const AV1_COMMON *const cm, - int (*factor_buf)[MAX_MODES], int rd_thresh, - int bsize, int best_mode_index) { - if (rd_thresh > 0) { - const int top_mode = MAX_MODES; - int mode; - for (mode = 0; mode < top_mode; ++mode) { - const BLOCK_SIZE min_size = AOMMAX(bsize - 1, BLOCK_4X4); - const BLOCK_SIZE max_size = - AOMMIN(bsize + 2, (int)cm->seq_params.sb_size); - BLOCK_SIZE bs; - for (bs = min_size; bs <= max_size; ++bs) { - int *const fact = &factor_buf[bs][mode]; - if (mode == best_mode_index) { - *fact -= (*fact >> 4); - } else { - *fact = AOMMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT); - } - } - } - } -} - -int av1_get_intra_cost_penalty(int qindex, int qdelta, - aom_bit_depth_t bit_depth) { - const int q = av1_dc_quant_Q3(qindex, qdelta, bit_depth); - switch (bit_depth) { - case AOM_BITS_8: return 20 * q; - case AOM_BITS_10: return 5 * q; - case AOM_BITS_12: return ROUND_POWER_OF_TWO(5 * q, 2); - default: - assert(0 && "bit_depth should be AOM_BITS_8, AOM_BITS_10 or AOM_BITS_12"); - return -1; - } -} diff --git a/third_party/aom/av1/encoder/rd.h b/third_party/aom/av1/encoder/rd.h deleted file mode 100644 index 755b61df5..000000000 --- a/third_party/aom/av1/encoder/rd.h +++ /dev/null @@ -1,464 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_RD_H_ -#define AOM_AV1_ENCODER_RD_H_ - -#include - -#include "av1/common/blockd.h" - -#include "av1/encoder/block.h" -#include "av1/encoder/context_tree.h" -#include "av1/encoder/cost.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define RDDIV_BITS 7 -#define RD_EPB_SHIFT 6 - -#define RDCOST(RM, R, D) \ - (ROUND_POWER_OF_TWO(((int64_t)(R)) * (RM), AV1_PROB_COST_SHIFT) + \ - ((D) * (1 << RDDIV_BITS))) - -#define RDCOST_DBL(RM, R, D) \ - (((((double)(R)) * (RM)) / (double)(1 << AV1_PROB_COST_SHIFT)) + \ - ((double)(D) * (1 << RDDIV_BITS))) - -#define QIDX_SKIP_THRESH 115 - -#define MV_COST_WEIGHT 108 -#define MV_COST_WEIGHT_SUB 120 - -#define RD_THRESH_MAX_FACT 64 -#define RD_THRESH_INC 1 - -// Factor to weigh the rate for switchable interp filters. -#define SWITCHABLE_INTERP_RATE_FACTOR 1 - -// This enumerator type needs to be kept aligned with the mode order in -// const MODE_DEFINITION av1_mode_order[MAX_MODES] used in the rd code. -typedef enum { - THR_NEARESTMV, - THR_NEARESTL2, - THR_NEARESTL3, - THR_NEARESTB, - THR_NEARESTA2, - THR_NEARESTA, - THR_NEARESTG, - - THR_NEWMV, - THR_NEWL2, - THR_NEWL3, - THR_NEWB, - THR_NEWA2, - THR_NEWA, - THR_NEWG, - - THR_NEARMV, - THR_NEARL2, - THR_NEARL3, - THR_NEARB, - THR_NEARA2, - THR_NEARA, - THR_NEARG, - - THR_GLOBALMV, - THR_GLOBALL2, - THR_GLOBALL3, - THR_GLOBALB, - THR_GLOBALA2, - THR_GLOBALA, - THR_GLOBALG, - - THR_COMP_NEAREST_NEARESTLA, - THR_COMP_NEAREST_NEARESTL2A, - THR_COMP_NEAREST_NEARESTL3A, - THR_COMP_NEAREST_NEARESTGA, - THR_COMP_NEAREST_NEARESTLB, - THR_COMP_NEAREST_NEARESTL2B, - THR_COMP_NEAREST_NEARESTL3B, - THR_COMP_NEAREST_NEARESTGB, - THR_COMP_NEAREST_NEARESTLA2, - THR_COMP_NEAREST_NEARESTL2A2, - THR_COMP_NEAREST_NEARESTL3A2, - THR_COMP_NEAREST_NEARESTGA2, - THR_COMP_NEAREST_NEARESTLL2, - THR_COMP_NEAREST_NEARESTLL3, - THR_COMP_NEAREST_NEARESTLG, - THR_COMP_NEAREST_NEARESTBA, - - THR_COMP_NEAR_NEARLA, - THR_COMP_NEW_NEARESTLA, - THR_COMP_NEAREST_NEWLA, - THR_COMP_NEW_NEARLA, - THR_COMP_NEAR_NEWLA, - THR_COMP_NEW_NEWLA, - THR_COMP_GLOBAL_GLOBALLA, - - THR_COMP_NEAR_NEARL2A, - THR_COMP_NEW_NEARESTL2A, - THR_COMP_NEAREST_NEWL2A, - THR_COMP_NEW_NEARL2A, - THR_COMP_NEAR_NEWL2A, - THR_COMP_NEW_NEWL2A, - THR_COMP_GLOBAL_GLOBALL2A, - - THR_COMP_NEAR_NEARL3A, - THR_COMP_NEW_NEARESTL3A, - THR_COMP_NEAREST_NEWL3A, - THR_COMP_NEW_NEARL3A, - THR_COMP_NEAR_NEWL3A, - THR_COMP_NEW_NEWL3A, - THR_COMP_GLOBAL_GLOBALL3A, - - THR_COMP_NEAR_NEARGA, - THR_COMP_NEW_NEARESTGA, - THR_COMP_NEAREST_NEWGA, - THR_COMP_NEW_NEARGA, - THR_COMP_NEAR_NEWGA, - THR_COMP_NEW_NEWGA, - THR_COMP_GLOBAL_GLOBALGA, - - THR_COMP_NEAR_NEARLB, - THR_COMP_NEW_NEARESTLB, - THR_COMP_NEAREST_NEWLB, - THR_COMP_NEW_NEARLB, - THR_COMP_NEAR_NEWLB, - THR_COMP_NEW_NEWLB, - THR_COMP_GLOBAL_GLOBALLB, - - THR_COMP_NEAR_NEARL2B, - THR_COMP_NEW_NEARESTL2B, - THR_COMP_NEAREST_NEWL2B, - THR_COMP_NEW_NEARL2B, - THR_COMP_NEAR_NEWL2B, - THR_COMP_NEW_NEWL2B, - THR_COMP_GLOBAL_GLOBALL2B, - - THR_COMP_NEAR_NEARL3B, - THR_COMP_NEW_NEARESTL3B, - THR_COMP_NEAREST_NEWL3B, - THR_COMP_NEW_NEARL3B, - THR_COMP_NEAR_NEWL3B, - THR_COMP_NEW_NEWL3B, - THR_COMP_GLOBAL_GLOBALL3B, - - THR_COMP_NEAR_NEARGB, - THR_COMP_NEW_NEARESTGB, - THR_COMP_NEAREST_NEWGB, - THR_COMP_NEW_NEARGB, - THR_COMP_NEAR_NEWGB, - THR_COMP_NEW_NEWGB, - THR_COMP_GLOBAL_GLOBALGB, - - THR_COMP_NEAR_NEARLA2, - THR_COMP_NEW_NEARESTLA2, - THR_COMP_NEAREST_NEWLA2, - THR_COMP_NEW_NEARLA2, - THR_COMP_NEAR_NEWLA2, - THR_COMP_NEW_NEWLA2, - THR_COMP_GLOBAL_GLOBALLA2, - - THR_COMP_NEAR_NEARL2A2, - THR_COMP_NEW_NEARESTL2A2, - THR_COMP_NEAREST_NEWL2A2, - THR_COMP_NEW_NEARL2A2, - THR_COMP_NEAR_NEWL2A2, - THR_COMP_NEW_NEWL2A2, - THR_COMP_GLOBAL_GLOBALL2A2, - - THR_COMP_NEAR_NEARL3A2, - THR_COMP_NEW_NEARESTL3A2, - THR_COMP_NEAREST_NEWL3A2, - THR_COMP_NEW_NEARL3A2, - THR_COMP_NEAR_NEWL3A2, - THR_COMP_NEW_NEWL3A2, - THR_COMP_GLOBAL_GLOBALL3A2, - - THR_COMP_NEAR_NEARGA2, - THR_COMP_NEW_NEARESTGA2, - THR_COMP_NEAREST_NEWGA2, - THR_COMP_NEW_NEARGA2, - THR_COMP_NEAR_NEWGA2, - THR_COMP_NEW_NEWGA2, - THR_COMP_GLOBAL_GLOBALGA2, - - THR_COMP_NEAR_NEARLL2, - THR_COMP_NEW_NEARESTLL2, - THR_COMP_NEAREST_NEWLL2, - THR_COMP_NEW_NEARLL2, - THR_COMP_NEAR_NEWLL2, - THR_COMP_NEW_NEWLL2, - THR_COMP_GLOBAL_GLOBALLL2, - - THR_COMP_NEAR_NEARLL3, - THR_COMP_NEW_NEARESTLL3, - THR_COMP_NEAREST_NEWLL3, - THR_COMP_NEW_NEARLL3, - THR_COMP_NEAR_NEWLL3, - THR_COMP_NEW_NEWLL3, - THR_COMP_GLOBAL_GLOBALLL3, - - THR_COMP_NEAR_NEARLG, - THR_COMP_NEW_NEARESTLG, - THR_COMP_NEAREST_NEWLG, - THR_COMP_NEW_NEARLG, - THR_COMP_NEAR_NEWLG, - THR_COMP_NEW_NEWLG, - THR_COMP_GLOBAL_GLOBALLG, - - THR_COMP_NEAR_NEARBA, - THR_COMP_NEW_NEARESTBA, - THR_COMP_NEAREST_NEWBA, - THR_COMP_NEW_NEARBA, - THR_COMP_NEAR_NEWBA, - THR_COMP_NEW_NEWBA, - THR_COMP_GLOBAL_GLOBALBA, - - THR_DC, - THR_PAETH, - THR_SMOOTH, - THR_SMOOTH_V, - THR_SMOOTH_H, - THR_H_PRED, - THR_V_PRED, - THR_D135_PRED, - THR_D203_PRED, - THR_D157_PRED, - THR_D67_PRED, - THR_D113_PRED, - THR_D45_PRED, - - MAX_MODES, - - LAST_SINGLE_REF_MODES = THR_GLOBALG, - MAX_SINGLE_REF_MODES = LAST_SINGLE_REF_MODES + 1, - LAST_COMP_REF_MODES = THR_COMP_GLOBAL_GLOBALBA, - MAX_COMP_REF_MODES = LAST_COMP_REF_MODES + 1 -} THR_MODES; - -typedef enum { - THR_LAST, - THR_LAST2, - THR_LAST3, - THR_BWDR, - THR_ALTR2, - THR_GOLD, - THR_ALTR, - - THR_COMP_LA, - THR_COMP_L2A, - THR_COMP_L3A, - THR_COMP_GA, - - THR_COMP_LB, - THR_COMP_L2B, - THR_COMP_L3B, - THR_COMP_GB, - - THR_COMP_LA2, - THR_COMP_L2A2, - THR_COMP_L3A2, - THR_COMP_GA2, - - THR_INTRA, - - MAX_REFS -} THR_MODES_SUB8X8; - -typedef struct RD_OPT { - // Thresh_mult is used to set a threshold for the rd score. A higher value - // means that we will accept the best mode so far more often. This number - // is used in combination with the current block size, and thresh_freq_fact - // to pick a threshold. - int thresh_mult[MAX_MODES]; - int thresh_mult_sub8x8[MAX_REFS]; - - int threshes[MAX_SEGMENTS][BLOCK_SIZES_ALL][MAX_MODES]; - - int64_t prediction_type_threshes[REF_FRAMES][REFERENCE_MODES]; - - int RDMULT; -} RD_OPT; - -static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) { -#if CONFIG_RD_DEBUG - int plane; -#endif - rd_stats->rate = 0; - rd_stats->dist = 0; - rd_stats->rdcost = 0; - rd_stats->sse = 0; - rd_stats->skip = 1; - rd_stats->zero_rate = 0; - rd_stats->invalid_rate = 0; - rd_stats->ref_rdcost = INT64_MAX; -#if CONFIG_RD_DEBUG - // This may run into problems when monochrome video is - // encoded, as there will only be 1 plane - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - rd_stats->txb_coeff_cost[plane] = 0; - { - int r, c; - for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) - for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) - rd_stats->txb_coeff_cost_map[plane][r][c] = 0; - } - } -#endif -} - -static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) { -#if CONFIG_RD_DEBUG - int plane; -#endif - rd_stats->rate = INT_MAX; - rd_stats->dist = INT64_MAX; - rd_stats->rdcost = INT64_MAX; - rd_stats->sse = INT64_MAX; - rd_stats->skip = 0; - rd_stats->zero_rate = 0; - rd_stats->invalid_rate = 1; - rd_stats->ref_rdcost = INT64_MAX; -#if CONFIG_RD_DEBUG - // This may run into problems when monochrome video is - // encoded, as there will only be 1 plane - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - rd_stats->txb_coeff_cost[plane] = INT_MAX; - { - int r, c; - for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) - for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) - rd_stats->txb_coeff_cost_map[plane][r][c] = INT_MAX; - } - } -#endif -} - -static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst, - const RD_STATS *rd_stats_src) { -#if CONFIG_RD_DEBUG - int plane; -#endif - rd_stats_dst->rate += rd_stats_src->rate; - if (!rd_stats_dst->zero_rate) - rd_stats_dst->zero_rate = rd_stats_src->zero_rate; - rd_stats_dst->dist += rd_stats_src->dist; - rd_stats_dst->sse += rd_stats_src->sse; - rd_stats_dst->skip &= rd_stats_src->skip; - rd_stats_dst->invalid_rate &= rd_stats_src->invalid_rate; -#if CONFIG_RD_DEBUG - // This may run into problems when monochrome video is - // encoded, as there will only be 1 plane - for (plane = 0; plane < MAX_MB_PLANE; ++plane) { - rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane]; - { - // TODO(angiebird): optimize this part - int r, c; - int ref_txb_coeff_cost = 0; - for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) - for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) { - rd_stats_dst->txb_coeff_cost_map[plane][r][c] += - rd_stats_src->txb_coeff_cost_map[plane][r][c]; - ref_txb_coeff_cost += rd_stats_dst->txb_coeff_cost_map[plane][r][c]; - } - assert(ref_txb_coeff_cost == rd_stats_dst->txb_coeff_cost[plane]); - } - } -#endif -} - -struct TileInfo; -struct TileDataEnc; -struct AV1_COMP; -struct macroblock; - -int av1_compute_rd_mult(const struct AV1_COMP *cpi, int qindex); - -void av1_initialize_rd_consts(struct AV1_COMP *cpi); - -void av1_initialize_me_consts(const struct AV1_COMP *cpi, MACROBLOCK *x, - int qindex); - -void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n, - unsigned int qstep, int *rate, int64_t *dist); - -void av1_model_rd_curvfit(double xqr, double *rate_f, double *distbysse_f); -void av1_model_rd_surffit(double xm, double yl, double *rate_f, - double *distbysse_f); - -int av1_get_switchable_rate(const AV1_COMMON *const cm, MACROBLOCK *x, - const MACROBLOCKD *xd); - -int av1_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block, - int stride); - -int16_t *av1_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block, - int16_t *base); - -YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const struct AV1_COMP *cpi, - int ref_frame); - -void av1_init_me_luts(void); - -void av1_set_mvcost(MACROBLOCK *x, int ref, int ref_mv_idx); - -void av1_get_entropy_contexts(BLOCK_SIZE bsize, - const struct macroblockd_plane *pd, - ENTROPY_CONTEXT t_above[MAX_MIB_SIZE], - ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]); - -void av1_set_rd_speed_thresholds(struct AV1_COMP *cpi); - -void av1_set_rd_speed_thresholds_sub8x8(struct AV1_COMP *cpi); - -void av1_update_rd_thresh_fact(const AV1_COMMON *const cm, - int (*fact)[MAX_MODES], int rd_thresh, int bsize, - int best_mode_index); - -static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, - int thresh_fact) { - return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX; -} - -void av1_mv_pred(const struct AV1_COMP *cpi, MACROBLOCK *x, - uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame, - BLOCK_SIZE block_size); - -static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) { - x->errorperbit = rdmult >> RD_EPB_SHIFT; - x->errorperbit += (x->errorperbit == 0); -} - -void av1_setup_pred_block(const MACROBLOCKD *xd, - struct buf_2d dst[MAX_MB_PLANE], - const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, - const struct scale_factors *scale, - const struct scale_factors *scale_uv, - const int num_planes); - -int av1_get_intra_cost_penalty(int qindex, int qdelta, - aom_bit_depth_t bit_depth); - -void av1_fill_mode_rates(AV1_COMMON *const cm, MACROBLOCK *x, - FRAME_CONTEXT *fc); - -void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc, - const int num_planes); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_RD_H_ diff --git a/third_party/aom/av1/encoder/rdopt.c b/third_party/aom/av1/encoder/rdopt.c deleted file mode 100644 index c2d15534f..000000000 --- a/third_party/aom/av1/encoder/rdopt.c +++ /dev/null @@ -1,12199 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include - -#include "config/aom_dsp_rtcd.h" -#include "config/av1_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/blend.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/aom_timer.h" -#include "aom_ports/mem.h" -#include "aom_ports/system_state.h" - -#include "av1/common/cfl.h" -#include "av1/common/common.h" -#include "av1/common/common_data.h" -#include "av1/common/entropy.h" -#include "av1/common/entropymode.h" -#include "av1/common/idct.h" -#include "av1/common/mvref_common.h" -#include "av1/common/obmc.h" -#include "av1/common/pred_common.h" -#include "av1/common/quant_common.h" -#include "av1/common/reconinter.h" -#include "av1/common/reconintra.h" -#include "av1/common/scan.h" -#include "av1/common/seg_common.h" -#include "av1/common/txb_common.h" -#include "av1/common/warped_motion.h" - -#include "av1/encoder/aq_variance.h" -#include "av1/encoder/av1_quantize.h" -#include "av1/encoder/cost.h" -#include "av1/encoder/encodemb.h" -#include "av1/encoder/encodemv.h" -#include "av1/encoder/encoder.h" -#include "av1/encoder/encodetxb.h" -#include "av1/encoder/hybrid_fwd_txfm.h" -#include "av1/encoder/mcomp.h" -#include "av1/encoder/ml.h" -#include "av1/encoder/palette.h" -#include "av1/encoder/pustats.h" -#include "av1/encoder/random.h" -#include "av1/encoder/ratectrl.h" -#include "av1/encoder/rd.h" -#include "av1/encoder/rdopt.h" -#include "av1/encoder/reconinter_enc.h" -#include "av1/encoder/tokenize.h" -#include "av1/encoder/tx_prune_model_weights.h" - -typedef void (*model_rd_for_sb_type)( - const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, - int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, - int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, - int *plane_rate, int64_t *plane_sse, int64_t *plane_dist); -typedef void (*model_rd_from_sse_type)(const AV1_COMP *const cpi, - const MACROBLOCK *const x, - BLOCK_SIZE plane_bsize, int plane, - int64_t sse, int num_samples, int *rate, - int64_t *dist); - -static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize, - MACROBLOCK *x, MACROBLOCKD *xd, int plane_from, - int plane_to, int mi_row, int mi_col, - int *out_rate_sum, int64_t *out_dist_sum, - int *skip_txfm_sb, int64_t *skip_sse_sb, - int *plane_rate, int64_t *plane_sse, - int64_t *plane_dist); -static void model_rd_for_sb_with_curvfit( - const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, - int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, - int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, - int *plane_rate, int64_t *plane_sse, int64_t *plane_dist); -static void model_rd_for_sb_with_surffit( - const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, - int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, - int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, - int *plane_rate, int64_t *plane_sse, int64_t *plane_dist); -static void model_rd_for_sb_with_dnn( - const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, - int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, - int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, - int *plane_rate, int64_t *plane_sse, int64_t *plane_dist); -static void model_rd_for_sb_with_fullrdy( - const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, - int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, - int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, - int *plane_rate, int64_t *plane_sse, int64_t *plane_dist); -static void model_rd_from_sse(const AV1_COMP *const cpi, - const MACROBLOCK *const x, BLOCK_SIZE plane_bsize, - int plane, int64_t sse, int num_samples, - int *rate, int64_t *dist); -static void model_rd_with_dnn(const AV1_COMP *const cpi, - const MACROBLOCK *const x, BLOCK_SIZE plane_bsize, - int plane, int64_t sse, int num_samples, - int *rate, int64_t *dist); -static void model_rd_with_curvfit(const AV1_COMP *const cpi, - const MACROBLOCK *const x, - BLOCK_SIZE plane_bsize, int plane, - int64_t sse, int num_samples, int *rate, - int64_t *dist); -static void model_rd_with_surffit(const AV1_COMP *const cpi, - const MACROBLOCK *const x, - BLOCK_SIZE plane_bsize, int plane, - int64_t sse, int num_samples, int *rate, - int64_t *dist); - -typedef enum { - MODELRD_LEGACY, - MODELRD_CURVFIT, - MODELRD_SUFFIT, - MODELRD_DNN, - MODELRD_FULLRDY, - MODELRD_TYPES -} ModelRdType; - -static model_rd_for_sb_type model_rd_sb_fn[MODELRD_TYPES] = { - model_rd_for_sb, model_rd_for_sb_with_curvfit, model_rd_for_sb_with_surffit, - model_rd_for_sb_with_dnn, model_rd_for_sb_with_fullrdy -}; - -static model_rd_from_sse_type model_rd_sse_fn[MODELRD_TYPES] = { - model_rd_from_sse, model_rd_with_curvfit, model_rd_with_surffit, - model_rd_with_dnn, NULL -}; - -// 0: Legacy model -// 1: Curve fit model -// 2: Surface fit model -// 3: DNN regression model -// 4: Full rd model -#define MODELRD_TYPE_INTERP_FILTER 1 -#define MODELRD_TYPE_TX_SEARCH_PRUNE 2 -#define MODELRD_TYPE_MASKED_COMPOUND 1 -#define MODELRD_TYPE_INTERINTRA 1 -#define MODELRD_TYPE_INTRA 1 -#define MODELRD_TYPE_JNT_COMPOUND 1 - -#define DUAL_FILTER_SET_SIZE (SWITCHABLE_FILTERS * SWITCHABLE_FILTERS) -static const InterpFilters filter_sets[DUAL_FILTER_SET_SIZE] = { - 0x00000000, 0x00010000, 0x00020000, // y = 0 - 0x00000001, 0x00010001, 0x00020001, // y = 1 - 0x00000002, 0x00010002, 0x00020002, // y = 2 -}; - -#define SECOND_REF_FRAME_MASK \ - ((1 << ALTREF_FRAME) | (1 << ALTREF2_FRAME) | (1 << BWDREF_FRAME) | \ - (1 << GOLDEN_FRAME) | (1 << LAST2_FRAME) | 0x01) - -#define ANGLE_SKIP_THRESH 10 - -static const double ADST_FLIP_SVM[8] = { - /* vertical */ - -6.6623, -2.8062, -3.2531, 3.1671, - /* horizontal */ - -7.7051, -3.2234, -3.6193, 3.4533 -}; - -typedef struct { - PREDICTION_MODE mode; - MV_REFERENCE_FRAME ref_frame[2]; -} MODE_DEFINITION; - -typedef struct { - MV_REFERENCE_FRAME ref_frame[2]; -} REF_DEFINITION; - -typedef enum { - FTXS_NONE = 0, - FTXS_DCT_AND_1D_DCT_ONLY = 1 << 0, - FTXS_DISABLE_TRELLIS_OPT = 1 << 1, - FTXS_USE_TRANSFORM_DOMAIN = 1 << 2 -} FAST_TX_SEARCH_MODE; - -static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x, - RD_STATS *rd_stats, BLOCK_SIZE bsize, int mi_row, - int mi_col, int64_t ref_best_rd); - -static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, - RD_STATS *rd_stats, BLOCK_SIZE bsize, - int64_t non_skip_ref_best_rd, - int64_t skip_ref_best_rd, - FAST_TX_SEARCH_MODE ftxs_mode); - -struct rdcost_block_args { - const AV1_COMP *cpi; - MACROBLOCK *x; - ENTROPY_CONTEXT t_above[MAX_MIB_SIZE]; - ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]; - RD_STATS rd_stats; - int64_t this_rd; - int64_t best_rd; - int exit_early; - int incomplete_exit; - int use_fast_coef_costing; - FAST_TX_SEARCH_MODE ftxs_mode; -}; - -#define LAST_NEW_MV_INDEX 6 -static const MODE_DEFINITION av1_mode_order[MAX_MODES] = { - { NEARESTMV, { LAST_FRAME, NONE_FRAME } }, - { NEARESTMV, { LAST2_FRAME, NONE_FRAME } }, - { NEARESTMV, { LAST3_FRAME, NONE_FRAME } }, - { NEARESTMV, { BWDREF_FRAME, NONE_FRAME } }, - { NEARESTMV, { ALTREF2_FRAME, NONE_FRAME } }, - { NEARESTMV, { ALTREF_FRAME, NONE_FRAME } }, - { NEARESTMV, { GOLDEN_FRAME, NONE_FRAME } }, - - { NEWMV, { LAST_FRAME, NONE_FRAME } }, - { NEWMV, { LAST2_FRAME, NONE_FRAME } }, - { NEWMV, { LAST3_FRAME, NONE_FRAME } }, - { NEWMV, { BWDREF_FRAME, NONE_FRAME } }, - { NEWMV, { ALTREF2_FRAME, NONE_FRAME } }, - { NEWMV, { ALTREF_FRAME, NONE_FRAME } }, - { NEWMV, { GOLDEN_FRAME, NONE_FRAME } }, - - { NEARMV, { LAST_FRAME, NONE_FRAME } }, - { NEARMV, { LAST2_FRAME, NONE_FRAME } }, - { NEARMV, { LAST3_FRAME, NONE_FRAME } }, - { NEARMV, { BWDREF_FRAME, NONE_FRAME } }, - { NEARMV, { ALTREF2_FRAME, NONE_FRAME } }, - { NEARMV, { ALTREF_FRAME, NONE_FRAME } }, - { NEARMV, { GOLDEN_FRAME, NONE_FRAME } }, - - { GLOBALMV, { LAST_FRAME, NONE_FRAME } }, - { GLOBALMV, { LAST2_FRAME, NONE_FRAME } }, - { GLOBALMV, { LAST3_FRAME, NONE_FRAME } }, - { GLOBALMV, { BWDREF_FRAME, NONE_FRAME } }, - { GLOBALMV, { ALTREF2_FRAME, NONE_FRAME } }, - { GLOBALMV, { GOLDEN_FRAME, NONE_FRAME } }, - { GLOBALMV, { ALTREF_FRAME, NONE_FRAME } }, - - // TODO(zoeliu): May need to reconsider the order on the modes to check - - { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } }, - { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } }, - { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } }, - { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } }, - { NEAREST_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } }, - { NEAREST_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } }, - { NEAREST_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } }, - { NEAREST_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } }, - { NEAREST_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } }, - { NEAREST_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } }, - { NEAREST_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } }, - { NEAREST_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, - - { NEAREST_NEARESTMV, { LAST_FRAME, LAST2_FRAME } }, - { NEAREST_NEARESTMV, { LAST_FRAME, LAST3_FRAME } }, - { NEAREST_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } }, - { NEAREST_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } }, - - { NEAR_NEARMV, { LAST_FRAME, ALTREF_FRAME } }, - { NEW_NEARESTMV, { LAST_FRAME, ALTREF_FRAME } }, - { NEAREST_NEWMV, { LAST_FRAME, ALTREF_FRAME } }, - { NEW_NEARMV, { LAST_FRAME, ALTREF_FRAME } }, - { NEAR_NEWMV, { LAST_FRAME, ALTREF_FRAME } }, - { NEW_NEWMV, { LAST_FRAME, ALTREF_FRAME } }, - { GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF_FRAME } }, - - { NEAR_NEARMV, { LAST2_FRAME, ALTREF_FRAME } }, - { NEW_NEARESTMV, { LAST2_FRAME, ALTREF_FRAME } }, - { NEAREST_NEWMV, { LAST2_FRAME, ALTREF_FRAME } }, - { NEW_NEARMV, { LAST2_FRAME, ALTREF_FRAME } }, - { NEAR_NEWMV, { LAST2_FRAME, ALTREF_FRAME } }, - { NEW_NEWMV, { LAST2_FRAME, ALTREF_FRAME } }, - { GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF_FRAME } }, - - { NEAR_NEARMV, { LAST3_FRAME, ALTREF_FRAME } }, - { NEW_NEARESTMV, { LAST3_FRAME, ALTREF_FRAME } }, - { NEAREST_NEWMV, { LAST3_FRAME, ALTREF_FRAME } }, - { NEW_NEARMV, { LAST3_FRAME, ALTREF_FRAME } }, - { NEAR_NEWMV, { LAST3_FRAME, ALTREF_FRAME } }, - { NEW_NEWMV, { LAST3_FRAME, ALTREF_FRAME } }, - { GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF_FRAME } }, - - { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } }, - { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } }, - { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } }, - { NEW_NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } }, - { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } }, - { NEW_NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } }, - { GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF_FRAME } }, - - { NEAR_NEARMV, { LAST_FRAME, BWDREF_FRAME } }, - { NEW_NEARESTMV, { LAST_FRAME, BWDREF_FRAME } }, - { NEAREST_NEWMV, { LAST_FRAME, BWDREF_FRAME } }, - { NEW_NEARMV, { LAST_FRAME, BWDREF_FRAME } }, - { NEAR_NEWMV, { LAST_FRAME, BWDREF_FRAME } }, - { NEW_NEWMV, { LAST_FRAME, BWDREF_FRAME } }, - { GLOBAL_GLOBALMV, { LAST_FRAME, BWDREF_FRAME } }, - - { NEAR_NEARMV, { LAST2_FRAME, BWDREF_FRAME } }, - { NEW_NEARESTMV, { LAST2_FRAME, BWDREF_FRAME } }, - { NEAREST_NEWMV, { LAST2_FRAME, BWDREF_FRAME } }, - { NEW_NEARMV, { LAST2_FRAME, BWDREF_FRAME } }, - { NEAR_NEWMV, { LAST2_FRAME, BWDREF_FRAME } }, - { NEW_NEWMV, { LAST2_FRAME, BWDREF_FRAME } }, - { GLOBAL_GLOBALMV, { LAST2_FRAME, BWDREF_FRAME } }, - - { NEAR_NEARMV, { LAST3_FRAME, BWDREF_FRAME } }, - { NEW_NEARESTMV, { LAST3_FRAME, BWDREF_FRAME } }, - { NEAREST_NEWMV, { LAST3_FRAME, BWDREF_FRAME } }, - { NEW_NEARMV, { LAST3_FRAME, BWDREF_FRAME } }, - { NEAR_NEWMV, { LAST3_FRAME, BWDREF_FRAME } }, - { NEW_NEWMV, { LAST3_FRAME, BWDREF_FRAME } }, - { GLOBAL_GLOBALMV, { LAST3_FRAME, BWDREF_FRAME } }, - - { NEAR_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } }, - { NEW_NEARESTMV, { GOLDEN_FRAME, BWDREF_FRAME } }, - { NEAREST_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, - { NEW_NEARMV, { GOLDEN_FRAME, BWDREF_FRAME } }, - { NEAR_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, - { NEW_NEWMV, { GOLDEN_FRAME, BWDREF_FRAME } }, - { GLOBAL_GLOBALMV, { GOLDEN_FRAME, BWDREF_FRAME } }, - - { NEAR_NEARMV, { LAST_FRAME, ALTREF2_FRAME } }, - { NEW_NEARESTMV, { LAST_FRAME, ALTREF2_FRAME } }, - { NEAREST_NEWMV, { LAST_FRAME, ALTREF2_FRAME } }, - { NEW_NEARMV, { LAST_FRAME, ALTREF2_FRAME } }, - { NEAR_NEWMV, { LAST_FRAME, ALTREF2_FRAME } }, - { NEW_NEWMV, { LAST_FRAME, ALTREF2_FRAME } }, - { GLOBAL_GLOBALMV, { LAST_FRAME, ALTREF2_FRAME } }, - - { NEAR_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } }, - { NEW_NEARESTMV, { LAST2_FRAME, ALTREF2_FRAME } }, - { NEAREST_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } }, - { NEW_NEARMV, { LAST2_FRAME, ALTREF2_FRAME } }, - { NEAR_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } }, - { NEW_NEWMV, { LAST2_FRAME, ALTREF2_FRAME } }, - { GLOBAL_GLOBALMV, { LAST2_FRAME, ALTREF2_FRAME } }, - - { NEAR_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } }, - { NEW_NEARESTMV, { LAST3_FRAME, ALTREF2_FRAME } }, - { NEAREST_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } }, - { NEW_NEARMV, { LAST3_FRAME, ALTREF2_FRAME } }, - { NEAR_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } }, - { NEW_NEWMV, { LAST3_FRAME, ALTREF2_FRAME } }, - { GLOBAL_GLOBALMV, { LAST3_FRAME, ALTREF2_FRAME } }, - - { NEAR_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, - { NEW_NEARESTMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, - { NEAREST_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, - { NEW_NEARMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, - { NEAR_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, - { NEW_NEWMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, - { GLOBAL_GLOBALMV, { GOLDEN_FRAME, ALTREF2_FRAME } }, - - { NEAR_NEARMV, { LAST_FRAME, LAST2_FRAME } }, - { NEW_NEARESTMV, { LAST_FRAME, LAST2_FRAME } }, - { NEAREST_NEWMV, { LAST_FRAME, LAST2_FRAME } }, - { NEW_NEARMV, { LAST_FRAME, LAST2_FRAME } }, - { NEAR_NEWMV, { LAST_FRAME, LAST2_FRAME } }, - { NEW_NEWMV, { LAST_FRAME, LAST2_FRAME } }, - { GLOBAL_GLOBALMV, { LAST_FRAME, LAST2_FRAME } }, - - { NEAR_NEARMV, { LAST_FRAME, LAST3_FRAME } }, - { NEW_NEARESTMV, { LAST_FRAME, LAST3_FRAME } }, - { NEAREST_NEWMV, { LAST_FRAME, LAST3_FRAME } }, - { NEW_NEARMV, { LAST_FRAME, LAST3_FRAME } }, - { NEAR_NEWMV, { LAST_FRAME, LAST3_FRAME } }, - { NEW_NEWMV, { LAST_FRAME, LAST3_FRAME } }, - { GLOBAL_GLOBALMV, { LAST_FRAME, LAST3_FRAME } }, - - { NEAR_NEARMV, { LAST_FRAME, GOLDEN_FRAME } }, - { NEW_NEARESTMV, { LAST_FRAME, GOLDEN_FRAME } }, - { NEAREST_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, - { NEW_NEARMV, { LAST_FRAME, GOLDEN_FRAME } }, - { NEAR_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, - { NEW_NEWMV, { LAST_FRAME, GOLDEN_FRAME } }, - { GLOBAL_GLOBALMV, { LAST_FRAME, GOLDEN_FRAME } }, - - { NEAR_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } }, - { NEW_NEARESTMV, { BWDREF_FRAME, ALTREF_FRAME } }, - { NEAREST_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, - { NEW_NEARMV, { BWDREF_FRAME, ALTREF_FRAME } }, - { NEAR_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, - { NEW_NEWMV, { BWDREF_FRAME, ALTREF_FRAME } }, - { GLOBAL_GLOBALMV, { BWDREF_FRAME, ALTREF_FRAME } }, - - // intra modes - { DC_PRED, { INTRA_FRAME, NONE_FRAME } }, - { PAETH_PRED, { INTRA_FRAME, NONE_FRAME } }, - { SMOOTH_PRED, { INTRA_FRAME, NONE_FRAME } }, - { SMOOTH_V_PRED, { INTRA_FRAME, NONE_FRAME } }, - { SMOOTH_H_PRED, { INTRA_FRAME, NONE_FRAME } }, - { H_PRED, { INTRA_FRAME, NONE_FRAME } }, - { V_PRED, { INTRA_FRAME, NONE_FRAME } }, - { D135_PRED, { INTRA_FRAME, NONE_FRAME } }, - { D203_PRED, { INTRA_FRAME, NONE_FRAME } }, - { D157_PRED, { INTRA_FRAME, NONE_FRAME } }, - { D67_PRED, { INTRA_FRAME, NONE_FRAME } }, - { D113_PRED, { INTRA_FRAME, NONE_FRAME } }, - { D45_PRED, { INTRA_FRAME, NONE_FRAME } }, -}; - -static const int16_t intra_to_mode_idx[INTRA_MODE_NUM] = { - 7, // DC_PRED, - 134, // V_PRED, - 133, // H_PRED, - 140, // D45_PRED, - 135, // D135_PRED, - 139, // D113_PRED, - 137, // D157_PRED, - 136, // D203_PRED, - 138, // D67_PRED, - 46, // SMOOTH_PRED, - 47, // SMOOTH_V_PRED, - 48, // SMOOTH_H_PRED, - 45, // PAETH_PRED, -}; - -/* clang-format off */ -static const int16_t single_inter_to_mode_idx[SINGLE_INTER_MODE_NUM] - [REF_FRAMES] = { - // NEARESTMV, - { -1, 0, 1, 2, 6, 3, 4, 5, }, - // NEARMV, - { -1, 15, 16, 17, 21, 18, 19, 20, }, - // GLOBALMV, - { -1, 22, 23, 24, 27, 25, 26, 28, }, - // NEWMV, - { -1, 8, 9, 10, 14, 11, 12, 13, }, -}; -/* clang-format on */ - -/* clang-format off */ -static const int16_t comp_inter_to_mode_idx[COMP_INTER_MODE_NUM][REF_FRAMES] - [REF_FRAMES] = { - // NEAREST_NEARESTMV, - { - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, 41, 42, 43, 33, 37, 29, }, - { -1, -1, -1, -1, -1, 34, 38, 30, }, - { -1, -1, -1, -1, -1, 35, 39, 31, }, - { -1, -1, -1, -1, -1, 36, 40, 32, }, - { -1, -1, -1, -1, -1, -1, -1, 44, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - }, - // NEAR_NEARMV, - { - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, 141, 148, 155, 77, 105, 49, }, - { -1, -1, -1, -1, -1, 84, 112, 56, }, - { -1, -1, -1, -1, -1, 91, 119, 63, }, - { -1, -1, -1, -1, -1, 98, 126, 70, }, - { -1, -1, -1, -1, -1, -1, -1, 162, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - }, - // NEAREST_NEWMV, - { - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, 143, 150, 157, 79, 107, 51, }, - { -1, -1, -1, -1, -1, 86, 114, 58, }, - { -1, -1, -1, -1, -1, 93, 121, 65, }, - { -1, -1, -1, -1, -1, 100, 128, 72, }, - { -1, -1, -1, -1, -1, -1, -1, 164, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - }, - // NEW_NEARESTMV, - { - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, 142, 149, 156, 78, 106, 50, }, - { -1, -1, -1, -1, -1, 85, 113, 57, }, - { -1, -1, -1, -1, -1, 92, 120, 64, }, - { -1, -1, -1, -1, -1, 99, 127, 71, }, - { -1, -1, -1, -1, -1, -1, -1, 163, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - }, - // NEAR_NEWMV, - { - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, 145, 152, 159, 81, 109, 53, }, - { -1, -1, -1, -1, -1, 88, 116, 60, }, - { -1, -1, -1, -1, -1, 95, 123, 67, }, - { -1, -1, -1, -1, -1, 102, 130, 74, }, - { -1, -1, -1, -1, -1, -1, -1, 166, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - }, - // NEW_NEARMV, - { - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, 144, 151, 158, 80, 108, 52, }, - { -1, -1, -1, -1, -1, 87, 115, 59, }, - { -1, -1, -1, -1, -1, 94, 122, 66, }, - { -1, -1, -1, -1, -1, 101, 129, 73, }, - { -1, -1, -1, -1, -1, -1, -1, 165, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - }, - // GLOBAL_GLOBALMV, - { - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, 147, 154, 161, 83, 111, 55, }, - { -1, -1, -1, -1, -1, 90, 118, 62, }, - { -1, -1, -1, -1, -1, 97, 125, 69, }, - { -1, -1, -1, -1, -1, 104, 132, 76, }, - { -1, -1, -1, -1, -1, -1, -1, 168, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - }, - // NEW_NEWMV, - { - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, 146, 153, 160, 82, 110, 54, }, - { -1, -1, -1, -1, -1, 89, 117, 61, }, - { -1, -1, -1, -1, -1, 96, 124, 68, }, - { -1, -1, -1, -1, -1, 103, 131, 75, }, - { -1, -1, -1, -1, -1, -1, -1, 167, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - { -1, -1, -1, -1, -1, -1, -1, -1, }, - }, -}; -/* clang-format on */ - -static int get_prediction_mode_idx(PREDICTION_MODE this_mode, - MV_REFERENCE_FRAME ref_frame, - MV_REFERENCE_FRAME second_ref_frame) { - if (this_mode < INTRA_MODE_END) { - assert(ref_frame == INTRA_FRAME); - assert(second_ref_frame == NONE_FRAME); - return intra_to_mode_idx[this_mode - INTRA_MODE_START]; - } - if (this_mode >= SINGLE_INTER_MODE_START && - this_mode < SINGLE_INTER_MODE_END) { - assert((ref_frame > INTRA_FRAME) && (ref_frame <= ALTREF_FRAME)); - return single_inter_to_mode_idx[this_mode - SINGLE_INTER_MODE_START] - [ref_frame]; - } - if (this_mode >= COMP_INTER_MODE_START && this_mode < COMP_INTER_MODE_END) { - assert((ref_frame > INTRA_FRAME) && (ref_frame <= ALTREF_FRAME)); - assert((second_ref_frame > INTRA_FRAME) && - (second_ref_frame <= ALTREF_FRAME)); - return comp_inter_to_mode_idx[this_mode - COMP_INTER_MODE_START][ref_frame] - [second_ref_frame]; - } - assert(0); - return -1; -} - -static const PREDICTION_MODE intra_rd_search_mode_order[INTRA_MODES] = { - DC_PRED, H_PRED, V_PRED, SMOOTH_PRED, PAETH_PRED, - SMOOTH_V_PRED, SMOOTH_H_PRED, D135_PRED, D203_PRED, D157_PRED, - D67_PRED, D113_PRED, D45_PRED, -}; - -static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = { - UV_DC_PRED, UV_CFL_PRED, UV_H_PRED, UV_V_PRED, - UV_SMOOTH_PRED, UV_PAETH_PRED, UV_SMOOTH_V_PRED, UV_SMOOTH_H_PRED, - UV_D135_PRED, UV_D203_PRED, UV_D157_PRED, UV_D67_PRED, - UV_D113_PRED, UV_D45_PRED, -}; - -typedef struct SingleInterModeState { - int64_t rd; - MV_REFERENCE_FRAME ref_frame; - int valid; -} SingleInterModeState; - -typedef struct InterModeSearchState { - int64_t best_rd; - MB_MODE_INFO best_mbmode; - int best_rate_y; - int best_rate_uv; - int best_mode_skippable; - int best_skip2; - int best_mode_index; - int skip_intra_modes; - int num_available_refs; - int64_t dist_refs[REF_FRAMES]; - int dist_order_refs[REF_FRAMES]; - int64_t mode_threshold[MAX_MODES]; - PREDICTION_MODE best_intra_mode; - int64_t best_intra_rd; - int angle_stats_ready; - uint8_t directional_mode_skip_mask[INTRA_MODES]; - unsigned int best_pred_sse; - int rate_uv_intra[TX_SIZES_ALL]; - int rate_uv_tokenonly[TX_SIZES_ALL]; - int64_t dist_uvs[TX_SIZES_ALL]; - int skip_uvs[TX_SIZES_ALL]; - UV_PREDICTION_MODE mode_uv[TX_SIZES_ALL]; - PALETTE_MODE_INFO pmi_uv[TX_SIZES_ALL]; - int8_t uv_angle_delta[TX_SIZES_ALL]; - int64_t best_pred_rd[REFERENCE_MODES]; - int64_t best_pred_diff[REFERENCE_MODES]; - // Save a set of single_newmv for each checked ref_mv. - int_mv single_newmv[MAX_REF_MV_SERCH][REF_FRAMES]; - int single_newmv_rate[MAX_REF_MV_SERCH][REF_FRAMES]; - int single_newmv_valid[MAX_REF_MV_SERCH][REF_FRAMES]; - int64_t modelled_rd[MB_MODE_COUNT][MAX_REF_MV_SERCH][REF_FRAMES]; - // The rd of simple translation in single inter modes - int64_t simple_rd[MB_MODE_COUNT][MAX_REF_MV_SERCH][REF_FRAMES]; - - // Single search results by [directions][modes][reference frames] - SingleInterModeState single_state[2][SINGLE_INTER_MODE_NUM][FWD_REFS]; - int single_state_cnt[2][SINGLE_INTER_MODE_NUM]; - SingleInterModeState single_state_modelled[2][SINGLE_INTER_MODE_NUM] - [FWD_REFS]; - int single_state_modelled_cnt[2][SINGLE_INTER_MODE_NUM]; - - MV_REFERENCE_FRAME single_rd_order[2][SINGLE_INTER_MODE_NUM][FWD_REFS]; -} InterModeSearchState; - -#if CONFIG_COLLECT_INTER_MODE_RD_STATS -int inter_mode_data_block_idx(BLOCK_SIZE bsize) { - if (bsize == BLOCK_8X8) return 1; - if (bsize == BLOCK_16X16) return 2; - if (bsize == BLOCK_32X32) return 3; - return -1; -} - -void av1_inter_mode_data_init(TileDataEnc *tile_data) { - for (int i = 0; i < BLOCK_SIZES_ALL; ++i) { - InterModeRdModel *md = &tile_data->inter_mode_rd_models[i]; - md->ready = 0; - md->num = 0; - md->dist_sum = 0; - md->ld_sum = 0; - md->sse_sum = 0; - md->sse_sse_sum = 0; - md->sse_ld_sum = 0; - } -} - -static int get_est_rate_dist(TileDataEnc *tile_data, BLOCK_SIZE bsize, - int64_t sse, int *est_residue_cost, - int64_t *est_dist) { - aom_clear_system_state(); - const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize]; - if (md->ready) { - const double est_ld = md->a * sse + md->b; - if (sse < md->dist_mean) { - *est_residue_cost = 0; - *est_dist = sse; - } else { - *est_residue_cost = (int)round((sse - md->dist_mean) / est_ld); - *est_dist = (int64_t)round(md->dist_mean); - } - return 1; - } - return 0; -} - -static int64_t get_est_rd(TileDataEnc *tile_data, BLOCK_SIZE bsize, int rdmult, - int64_t sse, int curr_cost) { - int est_residue_cost; - int64_t est_dist; - if (get_est_rate_dist(tile_data, bsize, sse, &est_residue_cost, &est_dist)) { - int rate = est_residue_cost + curr_cost; - int64_t est_rd = RDCOST(rdmult, rate, est_dist); - return est_rd; - } - return 0; -} - -void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult) { - aom_clear_system_state(); - for (int bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) { - const int block_idx = inter_mode_data_block_idx(bsize); - InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize]; - if (block_idx == -1) continue; - if ((md->ready == 0 && md->num < 200) || (md->ready == 1 && md->num < 64)) { - continue; - } else { - if (md->ready == 0) { - md->dist_mean = md->dist_sum / md->num; - md->ld_mean = md->ld_sum / md->num; - md->sse_mean = md->sse_sum / md->num; - md->sse_sse_mean = md->sse_sse_sum / md->num; - md->sse_ld_mean = md->sse_ld_sum / md->num; - } else { - const double factor = 3; - md->dist_mean = - (md->dist_mean * factor + (md->dist_sum / md->num)) / (factor + 1); - md->ld_mean = - (md->ld_mean * factor + (md->ld_sum / md->num)) / (factor + 1); - md->sse_mean = - (md->sse_mean * factor + (md->sse_sum / md->num)) / (factor + 1); - md->sse_sse_mean = - (md->sse_sse_mean * factor + (md->sse_sse_sum / md->num)) / - (factor + 1); - md->sse_ld_mean = - (md->sse_ld_mean * factor + (md->sse_ld_sum / md->num)) / - (factor + 1); - } - - const double my = md->ld_mean; - const double mx = md->sse_mean; - const double dx = sqrt(md->sse_sse_mean); - const double dxy = md->sse_ld_mean; - - md->a = (dxy - mx * my) / (dx * dx - mx * mx); - md->b = my - md->a * mx; - md->ready = 1; - - md->num = 0; - md->dist_sum = 0; - md->ld_sum = 0; - md->sse_sum = 0; - md->sse_sse_sum = 0; - md->sse_ld_sum = 0; - } - (void)rdmult; - } -} - -static void inter_mode_data_push(TileDataEnc *tile_data, BLOCK_SIZE bsize, - int64_t sse, int64_t dist, int residue_cost) { - if (residue_cost == 0 || sse == dist) return; - const int block_idx = inter_mode_data_block_idx(bsize); - if (block_idx == -1) return; - InterModeRdModel *rd_model = &tile_data->inter_mode_rd_models[bsize]; - if (rd_model->num < INTER_MODE_RD_DATA_OVERALL_SIZE) { - aom_clear_system_state(); - const double ld = (sse - dist) * 1. / residue_cost; - ++rd_model->num; - rd_model->dist_sum += dist; - rd_model->ld_sum += ld; - rd_model->sse_sum += sse; - rd_model->sse_sse_sum += sse * sse; - rd_model->sse_ld_sum += sse * ld; - } -} - -static void inter_modes_info_push(InterModesInfo *inter_modes_info, - int mode_rate, int64_t sse, int64_t est_rd, - const MB_MODE_INFO *mbmi) { - const int num = inter_modes_info->num; - assert(num < MAX_INTER_MODES); - inter_modes_info->mbmi_arr[num] = *mbmi; - inter_modes_info->mode_rate_arr[num] = mode_rate; - inter_modes_info->sse_arr[num] = sse; - inter_modes_info->est_rd_arr[num] = est_rd; - ++inter_modes_info->num; -} - -static int compare_rd_idx_pair(const void *a, const void *b) { - if (((RdIdxPair *)a)->rd == ((RdIdxPair *)b)->rd) { - return 0; - } else if (((const RdIdxPair *)a)->rd > ((const RdIdxPair *)b)->rd) { - return 1; - } else { - return -1; - } -} - -static void inter_modes_info_sort(const InterModesInfo *inter_modes_info, - RdIdxPair *rd_idx_pair_arr) { - if (inter_modes_info->num == 0) { - return; - } - for (int i = 0; i < inter_modes_info->num; ++i) { - rd_idx_pair_arr[i].idx = i; - rd_idx_pair_arr[i].rd = inter_modes_info->est_rd_arr[i]; - } - qsort(rd_idx_pair_arr, inter_modes_info->num, sizeof(rd_idx_pair_arr[0]), - compare_rd_idx_pair); -} -#endif // CONFIG_COLLECT_INTER_MODE_RD_STATS - -static INLINE int write_uniform_cost(int n, int v) { - const int l = get_unsigned_bits(n); - const int m = (1 << l) - n; - if (l == 0) return 0; - if (v < m) - return av1_cost_literal(l - 1); - else - return av1_cost_literal(l); -} - -// Similar to store_cfl_required(), but for use during the RDO process, -// where we haven't yet determined whether this block uses CfL. -static INLINE CFL_ALLOWED_TYPE store_cfl_required_rdo(const AV1_COMMON *cm, - const MACROBLOCK *x) { - const MACROBLOCKD *xd = &x->e_mbd; - - if (cm->seq_params.monochrome || x->skip_chroma_rd) return CFL_DISALLOWED; - - if (!xd->cfl.is_chroma_reference) { - // For non-chroma-reference blocks, we should always store the luma pixels, - // in case the corresponding chroma-reference block uses CfL. - // Note that this can only happen for block sizes which are <8 on - // their shortest side, as otherwise they would be chroma reference - // blocks. - return CFL_ALLOWED; - } - - // For chroma reference blocks, we should store data in the encoder iff we're - // allowed to try out CfL. - return is_cfl_allowed(xd); -} - -// constants for prune 1 and prune 2 decision boundaries -#define FAST_EXT_TX_CORR_MID 0.0 -#define FAST_EXT_TX_EDST_MID 0.1 -#define FAST_EXT_TX_CORR_MARGIN 0.5 -#define FAST_EXT_TX_EDST_MARGIN 0.3 - -static int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, - RD_STATS *rd_stats, BLOCK_SIZE bsize, - int64_t ref_best_rd, FAST_TX_SEARCH_MODE ftxs_mode); - -static unsigned pixel_dist_visible_only( - const AV1_COMP *const cpi, const MACROBLOCK *x, const uint8_t *src, - const int src_stride, const uint8_t *dst, const int dst_stride, - const BLOCK_SIZE tx_bsize, int txb_rows, int txb_cols, int visible_rows, - int visible_cols) { - unsigned sse; - - if (txb_rows == visible_rows && txb_cols == visible_cols) { - cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse); - return sse; - } - const MACROBLOCKD *xd = &x->e_mbd; - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - uint64_t sse64 = aom_highbd_sse_odd_size(src, src_stride, dst, dst_stride, - visible_cols, visible_rows); - return (unsigned int)ROUND_POWER_OF_TWO(sse64, (xd->bd - 8) * 2); - } - sse = aom_sse_odd_size(src, src_stride, dst, dst_stride, visible_cols, - visible_rows); - return sse; -} - -#if CONFIG_DIST_8X8 -static uint64_t cdef_dist_8x8_16bit(uint16_t *dst, int dstride, uint16_t *src, - int sstride, int coeff_shift) { - uint64_t svar = 0; - uint64_t dvar = 0; - uint64_t sum_s = 0; - uint64_t sum_d = 0; - uint64_t sum_s2 = 0; - uint64_t sum_d2 = 0; - uint64_t sum_sd = 0; - uint64_t dist = 0; - - int i, j; - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) { - sum_s += src[i * sstride + j]; - sum_d += dst[i * dstride + j]; - sum_s2 += src[i * sstride + j] * src[i * sstride + j]; - sum_d2 += dst[i * dstride + j] * dst[i * dstride + j]; - sum_sd += src[i * sstride + j] * dst[i * dstride + j]; - } - } - /* Compute the variance -- the calculation cannot go negative. */ - svar = sum_s2 - ((sum_s * sum_s + 32) >> 6); - dvar = sum_d2 - ((sum_d * sum_d + 32) >> 6); - - // Tuning of jm's original dering distortion metric used in CDEF tool, - // suggested by jm - const uint64_t a = 4; - const uint64_t b = 2; - const uint64_t c1 = (400 * a << 2 * coeff_shift); - const uint64_t c2 = (b * 20000 * a * a << 4 * coeff_shift); - - dist = (uint64_t)floor(.5 + (sum_d2 + sum_s2 - 2 * sum_sd) * .5 * - (svar + dvar + c1) / - (sqrt(svar * (double)dvar + c2))); - - // Calibrate dist to have similar rate for the same QP with MSE only - // distortion (as in master branch) - dist = (uint64_t)((float)dist * 0.75); - - return dist; -} - -static int od_compute_var_4x4(uint16_t *x, int stride) { - int sum; - int s2; - int i; - sum = 0; - s2 = 0; - for (i = 0; i < 4; i++) { - int j; - for (j = 0; j < 4; j++) { - int t; - - t = x[i * stride + j]; - sum += t; - s2 += t * t; - } - } - - return (s2 - (sum * sum >> 4)) >> 4; -} - -/* OD_DIST_LP_MID controls the frequency weighting filter used for computing - the distortion. For a value X, the filter is [1 X 1]/(X + 2) and - is applied both horizontally and vertically. For X=5, the filter is - a good approximation for the OD_QM8_Q4_HVS quantization matrix. */ -#define OD_DIST_LP_MID (5) -#define OD_DIST_LP_NORM (OD_DIST_LP_MID + 2) - -static double od_compute_dist_8x8(int use_activity_masking, uint16_t *x, - uint16_t *y, od_coeff *e_lp, int stride) { - double sum; - int min_var; - double mean_var; - double var_stat; - double activity; - double calibration; - int i; - int j; - double vardist; - - vardist = 0; - -#if 1 - min_var = INT_MAX; - mean_var = 0; - for (i = 0; i < 3; i++) { - for (j = 0; j < 3; j++) { - int varx; - int vary; - varx = od_compute_var_4x4(x + 2 * i * stride + 2 * j, stride); - vary = od_compute_var_4x4(y + 2 * i * stride + 2 * j, stride); - min_var = OD_MINI(min_var, varx); - mean_var += 1. / (1 + varx); - /* The cast to (double) is to avoid an overflow before the sqrt.*/ - vardist += varx - 2 * sqrt(varx * (double)vary) + vary; - } - } - /* We use a different variance statistic depending on whether activity - masking is used, since the harmonic mean appeared slightly worse with - masking off. The calibration constant just ensures that we preserve the - rate compared to activity=1. */ - if (use_activity_masking) { - calibration = 1.95; - var_stat = 9. / mean_var; - } else { - calibration = 1.62; - var_stat = min_var; - } - /* 1.62 is a calibration constant, 0.25 is a noise floor and 1/6 is the - activity masking constant. */ - activity = calibration * pow(.25 + var_stat, -1. / 6); -#else - activity = 1; -#endif // 1 - sum = 0; - for (i = 0; i < 8; i++) { - for (j = 0; j < 8; j++) - sum += e_lp[i * stride + j] * (double)e_lp[i * stride + j]; - } - /* Normalize the filter to unit DC response. */ - sum *= 1. / (OD_DIST_LP_NORM * OD_DIST_LP_NORM * OD_DIST_LP_NORM * - OD_DIST_LP_NORM); - return activity * activity * (sum + vardist); -} - -// Note : Inputs x and y are in a pixel domain -static double od_compute_dist_common(int activity_masking, uint16_t *x, - uint16_t *y, int bsize_w, int bsize_h, - int qindex, od_coeff *tmp, - od_coeff *e_lp) { - int i, j; - double sum = 0; - const int mid = OD_DIST_LP_MID; - - for (j = 0; j < bsize_w; j++) { - e_lp[j] = mid * tmp[j] + 2 * tmp[bsize_w + j]; - e_lp[(bsize_h - 1) * bsize_w + j] = mid * tmp[(bsize_h - 1) * bsize_w + j] + - 2 * tmp[(bsize_h - 2) * bsize_w + j]; - } - for (i = 1; i < bsize_h - 1; i++) { - for (j = 0; j < bsize_w; j++) { - e_lp[i * bsize_w + j] = mid * tmp[i * bsize_w + j] + - tmp[(i - 1) * bsize_w + j] + - tmp[(i + 1) * bsize_w + j]; - } - } - for (i = 0; i < bsize_h; i += 8) { - for (j = 0; j < bsize_w; j += 8) { - sum += od_compute_dist_8x8(activity_masking, &x[i * bsize_w + j], - &y[i * bsize_w + j], &e_lp[i * bsize_w + j], - bsize_w); - } - } - /* Scale according to linear regression against SSE, for 8x8 blocks. */ - if (activity_masking) { - sum *= 2.2 + (1.7 - 2.2) * (qindex - 99) / (210 - 99) + - (qindex < 99 ? 2.5 * (qindex - 99) / 99 * (qindex - 99) / 99 : 0); - } else { - sum *= qindex >= 128 - ? 1.4 + (0.9 - 1.4) * (qindex - 128) / (209 - 128) - : qindex <= 43 ? 1.5 + (2.0 - 1.5) * (qindex - 43) / (16 - 43) - : 1.5 + (1.4 - 1.5) * (qindex - 43) / (128 - 43); - } - - return sum; -} - -static double od_compute_dist(uint16_t *x, uint16_t *y, int bsize_w, - int bsize_h, int qindex) { - assert(bsize_w >= 8 && bsize_h >= 8); - - int activity_masking = 0; - - int i, j; - DECLARE_ALIGNED(16, od_coeff, e[MAX_SB_SQUARE]); - DECLARE_ALIGNED(16, od_coeff, tmp[MAX_SB_SQUARE]); - DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_SB_SQUARE]); - for (i = 0; i < bsize_h; i++) { - for (j = 0; j < bsize_w; j++) { - e[i * bsize_w + j] = x[i * bsize_w + j] - y[i * bsize_w + j]; - } - } - int mid = OD_DIST_LP_MID; - for (i = 0; i < bsize_h; i++) { - tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1]; - tmp[i * bsize_w + bsize_w - 1] = - mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2]; - for (j = 1; j < bsize_w - 1; j++) { - tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] + - e[i * bsize_w + j + 1]; - } - } - return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h, - qindex, tmp, e_lp); -} - -static double od_compute_dist_diff(uint16_t *x, int16_t *e, int bsize_w, - int bsize_h, int qindex) { - assert(bsize_w >= 8 && bsize_h >= 8); - - int activity_masking = 0; - - DECLARE_ALIGNED(16, uint16_t, y[MAX_SB_SQUARE]); - DECLARE_ALIGNED(16, od_coeff, tmp[MAX_SB_SQUARE]); - DECLARE_ALIGNED(16, od_coeff, e_lp[MAX_SB_SQUARE]); - int i, j; - for (i = 0; i < bsize_h; i++) { - for (j = 0; j < bsize_w; j++) { - y[i * bsize_w + j] = x[i * bsize_w + j] - e[i * bsize_w + j]; - } - } - int mid = OD_DIST_LP_MID; - for (i = 0; i < bsize_h; i++) { - tmp[i * bsize_w] = mid * e[i * bsize_w] + 2 * e[i * bsize_w + 1]; - tmp[i * bsize_w + bsize_w - 1] = - mid * e[i * bsize_w + bsize_w - 1] + 2 * e[i * bsize_w + bsize_w - 2]; - for (j = 1; j < bsize_w - 1; j++) { - tmp[i * bsize_w + j] = mid * e[i * bsize_w + j] + e[i * bsize_w + j - 1] + - e[i * bsize_w + j + 1]; - } - } - return od_compute_dist_common(activity_masking, x, y, bsize_w, bsize_h, - qindex, tmp, e_lp); -} - -int64_t av1_dist_8x8(const AV1_COMP *const cpi, const MACROBLOCK *x, - const uint8_t *src, int src_stride, const uint8_t *dst, - int dst_stride, const BLOCK_SIZE tx_bsize, int bsw, - int bsh, int visible_w, int visible_h, int qindex) { - int64_t d = 0; - int i, j; - const MACROBLOCKD *xd = &x->e_mbd; - - DECLARE_ALIGNED(16, uint16_t, orig[MAX_SB_SQUARE]); - DECLARE_ALIGNED(16, uint16_t, rec[MAX_SB_SQUARE]); - - assert(bsw >= 8); - assert(bsh >= 8); - assert((bsw & 0x07) == 0); - assert((bsh & 0x07) == 0); - - if (x->tune_metric == AOM_TUNE_CDEF_DIST || - x->tune_metric == AOM_TUNE_DAALA_DIST) { - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - for (j = 0; j < bsh; j++) - for (i = 0; i < bsw; i++) - orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; - - if ((bsw == visible_w) && (bsh == visible_h)) { - for (j = 0; j < bsh; j++) - for (i = 0; i < bsw; i++) - rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i]; - } else { - for (j = 0; j < visible_h; j++) - for (i = 0; i < visible_w; i++) - rec[j * bsw + i] = CONVERT_TO_SHORTPTR(dst)[j * dst_stride + i]; - - if (visible_w < bsw) { - for (j = 0; j < bsh; j++) - for (i = visible_w; i < bsw; i++) - rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; - } - - if (visible_h < bsh) { - for (j = visible_h; j < bsh; j++) - for (i = 0; i < bsw; i++) - rec[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; - } - } - } else { - for (j = 0; j < bsh; j++) - for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i]; - - if ((bsw == visible_w) && (bsh == visible_h)) { - for (j = 0; j < bsh; j++) - for (i = 0; i < bsw; i++) rec[j * bsw + i] = dst[j * dst_stride + i]; - } else { - for (j = 0; j < visible_h; j++) - for (i = 0; i < visible_w; i++) - rec[j * bsw + i] = dst[j * dst_stride + i]; - - if (visible_w < bsw) { - for (j = 0; j < bsh; j++) - for (i = visible_w; i < bsw; i++) - rec[j * bsw + i] = src[j * src_stride + i]; - } - - if (visible_h < bsh) { - for (j = visible_h; j < bsh; j++) - for (i = 0; i < bsw; i++) - rec[j * bsw + i] = src[j * src_stride + i]; - } - } - } - } - - if (x->tune_metric == AOM_TUNE_DAALA_DIST) { - d = (int64_t)od_compute_dist(orig, rec, bsw, bsh, qindex); - } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) { - int coeff_shift = AOMMAX(xd->bd - 8, 0); - - for (i = 0; i < bsh; i += 8) { - for (j = 0; j < bsw; j += 8) { - d += cdef_dist_8x8_16bit(&rec[i * bsw + j], bsw, &orig[i * bsw + j], - bsw, coeff_shift); - } - } - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - d = ((uint64_t)d) >> 2 * coeff_shift; - } else { - // Otherwise, MSE by default - d = pixel_dist_visible_only(cpi, x, src, src_stride, dst, dst_stride, - tx_bsize, bsh, bsw, visible_h, visible_w); - } - - return d; -} - -static int64_t dist_8x8_diff(const MACROBLOCK *x, const uint8_t *src, - int src_stride, const int16_t *diff, - int diff_stride, int bsw, int bsh, int visible_w, - int visible_h, int qindex) { - int64_t d = 0; - int i, j; - const MACROBLOCKD *xd = &x->e_mbd; - - DECLARE_ALIGNED(16, uint16_t, orig[MAX_SB_SQUARE]); - DECLARE_ALIGNED(16, int16_t, diff16[MAX_SB_SQUARE]); - - assert(bsw >= 8); - assert(bsh >= 8); - assert((bsw & 0x07) == 0); - assert((bsh & 0x07) == 0); - - if (x->tune_metric == AOM_TUNE_CDEF_DIST || - x->tune_metric == AOM_TUNE_DAALA_DIST) { - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - for (j = 0; j < bsh; j++) - for (i = 0; i < bsw; i++) - orig[j * bsw + i] = CONVERT_TO_SHORTPTR(src)[j * src_stride + i]; - } else { - for (j = 0; j < bsh; j++) - for (i = 0; i < bsw; i++) orig[j * bsw + i] = src[j * src_stride + i]; - } - - if ((bsw == visible_w) && (bsh == visible_h)) { - for (j = 0; j < bsh; j++) - for (i = 0; i < bsw; i++) - diff16[j * bsw + i] = diff[j * diff_stride + i]; - } else { - for (j = 0; j < visible_h; j++) - for (i = 0; i < visible_w; i++) - diff16[j * bsw + i] = diff[j * diff_stride + i]; - - if (visible_w < bsw) { - for (j = 0; j < bsh; j++) - for (i = visible_w; i < bsw; i++) diff16[j * bsw + i] = 0; - } - - if (visible_h < bsh) { - for (j = visible_h; j < bsh; j++) - for (i = 0; i < bsw; i++) diff16[j * bsw + i] = 0; - } - } - } - - if (x->tune_metric == AOM_TUNE_DAALA_DIST) { - d = (int64_t)od_compute_dist_diff(orig, diff16, bsw, bsh, qindex); - } else if (x->tune_metric == AOM_TUNE_CDEF_DIST) { - int coeff_shift = AOMMAX(xd->bd - 8, 0); - DECLARE_ALIGNED(16, uint16_t, dst16[MAX_SB_SQUARE]); - - for (i = 0; i < bsh; i++) { - for (j = 0; j < bsw; j++) { - dst16[i * bsw + j] = orig[i * bsw + j] - diff16[i * bsw + j]; - } - } - - for (i = 0; i < bsh; i += 8) { - for (j = 0; j < bsw; j += 8) { - d += cdef_dist_8x8_16bit(&dst16[i * bsw + j], bsw, &orig[i * bsw + j], - bsw, coeff_shift); - } - } - // Don't scale 'd' for HBD since it will be done by caller side for diff - // input - } else { - // Otherwise, MSE by default - d = aom_sum_squares_2d_i16(diff, diff_stride, visible_w, visible_h); - } - - return d; -} -#endif // CONFIG_DIST_8X8 - -static void get_energy_distribution_fine(const AV1_COMP *cpi, BLOCK_SIZE bsize, - const uint8_t *src, int src_stride, - const uint8_t *dst, int dst_stride, - int need_4th, double *hordist, - double *verdist) { - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - unsigned int esq[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - - if (bsize < BLOCK_16X16 || (bsize >= BLOCK_4X16 && bsize <= BLOCK_32X8)) { - // Special cases: calculate 'esq' values manually, as we don't have 'vf' - // functions for the 16 (very small) sub-blocks of this block. - const int w_shift = (bw == 4) ? 0 : (bw == 8) ? 1 : (bw == 16) ? 2 : 3; - const int h_shift = (bh == 4) ? 0 : (bh == 8) ? 1 : (bh == 16) ? 2 : 3; - assert(bw <= 32); - assert(bh <= 32); - assert(((bw - 1) >> w_shift) + (((bh - 1) >> h_shift) << 2) == 15); - if (cpi->common.seq_params.use_highbitdepth) { - const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); - const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst); - for (int i = 0; i < bh; ++i) - for (int j = 0; j < bw; ++j) { - const int index = (j >> w_shift) + ((i >> h_shift) << 2); - esq[index] += - (src16[j + i * src_stride] - dst16[j + i * dst_stride]) * - (src16[j + i * src_stride] - dst16[j + i * dst_stride]); - } - } else { - for (int i = 0; i < bh; ++i) - for (int j = 0; j < bw; ++j) { - const int index = (j >> w_shift) + ((i >> h_shift) << 2); - esq[index] += (src[j + i * src_stride] - dst[j + i * dst_stride]) * - (src[j + i * src_stride] - dst[j + i * dst_stride]); - } - } - } else { // Calculate 'esq' values using 'vf' functions on the 16 sub-blocks. - const int f_index = - (bsize < BLOCK_SIZES) ? bsize - BLOCK_16X16 : bsize - BLOCK_8X16; - assert(f_index >= 0 && f_index < BLOCK_SIZES_ALL); - const BLOCK_SIZE subsize = (BLOCK_SIZE)f_index; - assert(block_size_wide[bsize] == 4 * block_size_wide[subsize]); - assert(block_size_high[bsize] == 4 * block_size_high[subsize]); - cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[0]); - cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride, - &esq[1]); - cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride, - &esq[2]); - cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4, - dst_stride, &esq[3]); - src += bh / 4 * src_stride; - dst += bh / 4 * dst_stride; - - cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[4]); - cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride, - &esq[5]); - cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride, - &esq[6]); - cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4, - dst_stride, &esq[7]); - src += bh / 4 * src_stride; - dst += bh / 4 * dst_stride; - - cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[8]); - cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride, - &esq[9]); - cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride, - &esq[10]); - cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4, - dst_stride, &esq[11]); - src += bh / 4 * src_stride; - dst += bh / 4 * dst_stride; - - cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[12]); - cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride, - &esq[13]); - cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride, - &esq[14]); - cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4, - dst_stride, &esq[15]); - } - - double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] + - esq[6] + esq[7] + esq[8] + esq[9] + esq[10] + esq[11] + - esq[12] + esq[13] + esq[14] + esq[15]; - if (total > 0) { - const double e_recip = 1.0 / total; - hordist[0] = ((double)esq[0] + esq[4] + esq[8] + esq[12]) * e_recip; - hordist[1] = ((double)esq[1] + esq[5] + esq[9] + esq[13]) * e_recip; - hordist[2] = ((double)esq[2] + esq[6] + esq[10] + esq[14]) * e_recip; - if (need_4th) { - hordist[3] = ((double)esq[3] + esq[7] + esq[11] + esq[15]) * e_recip; - } - verdist[0] = ((double)esq[0] + esq[1] + esq[2] + esq[3]) * e_recip; - verdist[1] = ((double)esq[4] + esq[5] + esq[6] + esq[7]) * e_recip; - verdist[2] = ((double)esq[8] + esq[9] + esq[10] + esq[11]) * e_recip; - if (need_4th) { - verdist[3] = ((double)esq[12] + esq[13] + esq[14] + esq[15]) * e_recip; - } - } else { - hordist[0] = verdist[0] = 0.25; - hordist[1] = verdist[1] = 0.25; - hordist[2] = verdist[2] = 0.25; - if (need_4th) { - hordist[3] = verdist[3] = 0.25; - } - } -} - -static int adst_vs_flipadst(const AV1_COMP *cpi, BLOCK_SIZE bsize, - const uint8_t *src, int src_stride, - const uint8_t *dst, int dst_stride) { - int prune_bitmask = 0; - double svm_proj_h = 0, svm_proj_v = 0; - double hdist[3] = { 0, 0, 0 }, vdist[3] = { 0, 0, 0 }; - get_energy_distribution_fine(cpi, bsize, src, src_stride, dst, dst_stride, 0, - hdist, vdist); - - svm_proj_v = vdist[0] * ADST_FLIP_SVM[0] + vdist[1] * ADST_FLIP_SVM[1] + - vdist[2] * ADST_FLIP_SVM[2] + ADST_FLIP_SVM[3]; - svm_proj_h = hdist[0] * ADST_FLIP_SVM[4] + hdist[1] * ADST_FLIP_SVM[5] + - hdist[2] * ADST_FLIP_SVM[6] + ADST_FLIP_SVM[7]; - if (svm_proj_v > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN) - prune_bitmask |= 1 << FLIPADST_1D; - else if (svm_proj_v < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN) - prune_bitmask |= 1 << ADST_1D; - - if (svm_proj_h > FAST_EXT_TX_EDST_MID + FAST_EXT_TX_EDST_MARGIN) - prune_bitmask |= 1 << (FLIPADST_1D + 8); - else if (svm_proj_h < FAST_EXT_TX_EDST_MID - FAST_EXT_TX_EDST_MARGIN) - prune_bitmask |= 1 << (ADST_1D + 8); - - return prune_bitmask; -} - -static void get_horver_correlation(const int16_t *diff, int stride, int w, - int h, double *hcorr, double *vcorr) { - // Returns hor/ver correlation coefficient - const int num = (h - 1) * (w - 1); - double num_r; - int i, j; - int64_t xy_sum = 0, xz_sum = 0; - int64_t x_sum = 0, y_sum = 0, z_sum = 0; - int64_t x2_sum = 0, y2_sum = 0, z2_sum = 0; - double x_var_n, y_var_n, z_var_n, xy_var_n, xz_var_n; - *hcorr = *vcorr = 1; - - assert(num > 0); - num_r = 1.0 / num; - for (i = 1; i < h; ++i) { - for (j = 1; j < w; ++j) { - const int16_t x = diff[i * stride + j]; - const int16_t y = diff[i * stride + j - 1]; - const int16_t z = diff[(i - 1) * stride + j]; - xy_sum += x * y; - xz_sum += x * z; - x_sum += x; - y_sum += y; - z_sum += z; - x2_sum += x * x; - y2_sum += y * y; - z2_sum += z * z; - } - } - x_var_n = x2_sum - (x_sum * x_sum) * num_r; - y_var_n = y2_sum - (y_sum * y_sum) * num_r; - z_var_n = z2_sum - (z_sum * z_sum) * num_r; - xy_var_n = xy_sum - (x_sum * y_sum) * num_r; - xz_var_n = xz_sum - (x_sum * z_sum) * num_r; - if (x_var_n > 0 && y_var_n > 0) { - *hcorr = xy_var_n / sqrt(x_var_n * y_var_n); - *hcorr = *hcorr < 0 ? 0 : *hcorr; - } - if (x_var_n > 0 && z_var_n > 0) { - *vcorr = xz_var_n / sqrt(x_var_n * z_var_n); - *vcorr = *vcorr < 0 ? 0 : *vcorr; - } -} - -static int dct_vs_idtx(const int16_t *diff, int stride, int w, int h) { - double hcorr, vcorr; - int prune_bitmask = 0; - get_horver_correlation(diff, stride, w, h, &hcorr, &vcorr); - - if (vcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN) - prune_bitmask |= 1 << IDTX_1D; - else if (vcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN) - prune_bitmask |= 1 << DCT_1D; - - if (hcorr > FAST_EXT_TX_CORR_MID + FAST_EXT_TX_CORR_MARGIN) - prune_bitmask |= 1 << (IDTX_1D + 8); - else if (hcorr < FAST_EXT_TX_CORR_MID - FAST_EXT_TX_CORR_MARGIN) - prune_bitmask |= 1 << (DCT_1D + 8); - return prune_bitmask; -} - -// Performance drop: 0.5%, Speed improvement: 24% -static int prune_two_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize, - MACROBLOCK *x, const MACROBLOCKD *xd, - int adst_flipadst, int dct_idtx) { - int prune = 0; - - if (adst_flipadst) { - const struct macroblock_plane *const p = &x->plane[0]; - const struct macroblockd_plane *const pd = &xd->plane[0]; - prune |= adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, - pd->dst.buf, pd->dst.stride); - } - if (dct_idtx) { - av1_subtract_plane(x, bsize, 0); - const struct macroblock_plane *const p = &x->plane[0]; - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - prune |= dct_vs_idtx(p->src_diff, bw, bw, bh); - } - - return prune; -} - -// Performance drop: 0.3%, Speed improvement: 5% -static int prune_one_for_sby(const AV1_COMP *cpi, BLOCK_SIZE bsize, - const MACROBLOCK *x, const MACROBLOCKD *xd) { - const struct macroblock_plane *const p = &x->plane[0]; - const struct macroblockd_plane *const pd = &xd->plane[0]; - return adst_vs_flipadst(cpi, bsize, p->src.buf, p->src.stride, pd->dst.buf, - pd->dst.stride); -} - -// 1D Transforms used in inter set, this needs to be changed if -// ext_tx_used_inter is changed -static const int ext_tx_used_inter_1D[EXT_TX_SETS_INTER][TX_TYPES_1D] = { - { 1, 0, 0, 0 }, - { 1, 1, 1, 1 }, - { 1, 1, 1, 1 }, - { 1, 0, 0, 1 }, -}; - -static void get_energy_distribution_finer(const int16_t *diff, int stride, - int bw, int bh, float *hordist, - float *verdist) { - // First compute downscaled block energy values (esq); downscale factors - // are defined by w_shift and h_shift. - unsigned int esq[256]; - const int w_shift = bw <= 8 ? 0 : 1; - const int h_shift = bh <= 8 ? 0 : 1; - const int esq_w = bw >> w_shift; - const int esq_h = bh >> h_shift; - const int esq_sz = esq_w * esq_h; - int i, j; - memset(esq, 0, esq_sz * sizeof(esq[0])); - if (w_shift) { - for (i = 0; i < bh; i++) { - unsigned int *cur_esq_row = esq + (i >> h_shift) * esq_w; - const int16_t *cur_diff_row = diff + i * stride; - for (j = 0; j < bw; j += 2) { - cur_esq_row[j >> 1] += (cur_diff_row[j] * cur_diff_row[j] + - cur_diff_row[j + 1] * cur_diff_row[j + 1]); - } - } - } else { - for (i = 0; i < bh; i++) { - unsigned int *cur_esq_row = esq + (i >> h_shift) * esq_w; - const int16_t *cur_diff_row = diff + i * stride; - for (j = 0; j < bw; j++) { - cur_esq_row[j] += cur_diff_row[j] * cur_diff_row[j]; - } - } - } - - uint64_t total = 0; - for (i = 0; i < esq_sz; i++) total += esq[i]; - - // Output hordist and verdist arrays are normalized 1D projections of esq - if (total == 0) { - float hor_val = 1.0f / esq_w; - for (j = 0; j < esq_w - 1; j++) hordist[j] = hor_val; - float ver_val = 1.0f / esq_h; - for (i = 0; i < esq_h - 1; i++) verdist[i] = ver_val; - return; - } - - const float e_recip = 1.0f / (float)total; - memset(hordist, 0, (esq_w - 1) * sizeof(hordist[0])); - memset(verdist, 0, (esq_h - 1) * sizeof(verdist[0])); - const unsigned int *cur_esq_row; - for (i = 0; i < esq_h - 1; i++) { - cur_esq_row = esq + i * esq_w; - for (j = 0; j < esq_w - 1; j++) { - hordist[j] += (float)cur_esq_row[j]; - verdist[i] += (float)cur_esq_row[j]; - } - verdist[i] += (float)cur_esq_row[j]; - } - cur_esq_row = esq + i * esq_w; - for (j = 0; j < esq_w - 1; j++) hordist[j] += (float)cur_esq_row[j]; - - for (j = 0; j < esq_w - 1; j++) hordist[j] *= e_recip; - for (i = 0; i < esq_h - 1; i++) verdist[i] *= e_recip; -} - -// Similar to get_horver_correlation, but also takes into account first -// row/column, when computing horizontal/vertical correlation. -static void get_horver_correlation_full(const int16_t *diff, int stride, int w, - int h, float *hcorr, float *vcorr) { - const float num_hor = (float)(h * (w - 1)); - const float num_ver = (float)((h - 1) * w); - int i, j; - - // The following notation is used: - // x - current pixel - // y - left neighbor pixel - // z - top neighbor pixel - int64_t xy_sum = 0, xz_sum = 0; - int64_t xhor_sum = 0, xver_sum = 0, y_sum = 0, z_sum = 0; - int64_t x2hor_sum = 0, x2ver_sum = 0, y2_sum = 0, z2_sum = 0; - - int16_t x, y, z; - for (j = 1; j < w; ++j) { - x = diff[j]; - y = diff[j - 1]; - xy_sum += x * y; - xhor_sum += x; - y_sum += y; - x2hor_sum += x * x; - y2_sum += y * y; - } - for (i = 1; i < h; ++i) { - x = diff[i * stride]; - z = diff[(i - 1) * stride]; - xz_sum += x * z; - xver_sum += x; - z_sum += z; - x2ver_sum += x * x; - z2_sum += z * z; - for (j = 1; j < w; ++j) { - x = diff[i * stride + j]; - y = diff[i * stride + j - 1]; - z = diff[(i - 1) * stride + j]; - xy_sum += x * y; - xz_sum += x * z; - xhor_sum += x; - xver_sum += x; - y_sum += y; - z_sum += z; - x2hor_sum += x * x; - x2ver_sum += x * x; - y2_sum += y * y; - z2_sum += z * z; - } - } - const float xhor_var_n = x2hor_sum - (xhor_sum * xhor_sum) / num_hor; - const float y_var_n = y2_sum - (y_sum * y_sum) / num_hor; - const float xy_var_n = xy_sum - (xhor_sum * y_sum) / num_hor; - const float xver_var_n = x2ver_sum - (xver_sum * xver_sum) / num_ver; - const float z_var_n = z2_sum - (z_sum * z_sum) / num_ver; - const float xz_var_n = xz_sum - (xver_sum * z_sum) / num_ver; - - *hcorr = *vcorr = 1; - if (xhor_var_n > 0 && y_var_n > 0) { - *hcorr = xy_var_n / sqrtf(xhor_var_n * y_var_n); - *hcorr = *hcorr < 0 ? 0 : *hcorr; - } - if (xver_var_n > 0 && z_var_n > 0) { - *vcorr = xz_var_n / sqrtf(xver_var_n * z_var_n); - *vcorr = *vcorr < 0 ? 0 : *vcorr; - } -} - -// Transforms raw scores into a probability distribution across 16 TX types -static void score_2D_transform_pow8(float *scores_2D, float shift) { - float sum = 0.0f; - int i; - - for (i = 0; i < 16; i++) { - float v, v2, v4; - v = AOMMAX(scores_2D[i] + shift, 0.0f); - v2 = v * v; - v4 = v2 * v2; - scores_2D[i] = v4 * v4; - sum += scores_2D[i]; - } - for (i = 0; i < 16; i++) scores_2D[i] /= sum; -} - -// These thresholds were calibrated to provide a certain number of TX types -// pruned by the model on average, i.e. selecting a threshold with index i -// will lead to pruning i+1 TX types on average -static const float *prune_2D_adaptive_thresholds[] = { - // TX_4X4 - (float[]){ 0.00549f, 0.01306f, 0.02039f, 0.02747f, 0.03406f, 0.04065f, - 0.04724f, 0.05383f, 0.06067f, 0.06799f, 0.07605f, 0.08533f, - 0.09778f, 0.11780f }, - // TX_8X8 - (float[]){ 0.00037f, 0.00183f, 0.00525f, 0.01038f, 0.01697f, 0.02502f, - 0.03381f, 0.04333f, 0.05286f, 0.06287f, 0.07434f, 0.08850f, - 0.10803f, 0.14124f }, - // TX_16X16 - (float[]){ 0.01404f, 0.02820f, 0.04211f, 0.05164f, 0.05798f, 0.06335f, - 0.06897f, 0.07629f, 0.08875f, 0.11169f }, - // TX_32X32 - NULL, - // TX_64X64 - NULL, - // TX_4X8 - (float[]){ 0.00183f, 0.00745f, 0.01428f, 0.02185f, 0.02966f, 0.03723f, - 0.04456f, 0.05188f, 0.05920f, 0.06702f, 0.07605f, 0.08704f, - 0.10168f, 0.12585f }, - // TX_8X4 - (float[]){ 0.00085f, 0.00476f, 0.01135f, 0.01892f, 0.02698f, 0.03528f, - 0.04358f, 0.05164f, 0.05994f, 0.06848f, 0.07849f, 0.09021f, - 0.10583f, 0.13123f }, - // TX_8X16 - (float[]){ 0.00037f, 0.00232f, 0.00671f, 0.01257f, 0.01965f, 0.02722f, - 0.03552f, 0.04382f, 0.05237f, 0.06189f, 0.07336f, 0.08728f, - 0.10730f, 0.14221f }, - // TX_16X8 - (float[]){ 0.00061f, 0.00330f, 0.00818f, 0.01453f, 0.02185f, 0.02966f, - 0.03772f, 0.04578f, 0.05383f, 0.06262f, 0.07288f, 0.08582f, - 0.10339f, 0.13464f }, - // TX_16X32 - NULL, - // TX_32X16 - NULL, - // TX_32X64 - NULL, - // TX_64X32 - NULL, - // TX_4X16 - (float[]){ 0.00232f, 0.00671f, 0.01257f, 0.01941f, 0.02673f, 0.03430f, - 0.04211f, 0.04968f, 0.05750f, 0.06580f, 0.07507f, 0.08655f, - 0.10242f, 0.12878f }, - // TX_16X4 - (float[]){ 0.00110f, 0.00525f, 0.01208f, 0.01990f, 0.02795f, 0.03601f, - 0.04358f, 0.05115f, 0.05896f, 0.06702f, 0.07629f, 0.08752f, - 0.10217f, 0.12610f }, - // TX_8X32 - NULL, - // TX_32X8 - NULL, - // TX_16X64 - NULL, - // TX_64X16 - NULL, -}; - -static uint16_t prune_tx_2D(MACROBLOCK *x, BLOCK_SIZE bsize, TX_SIZE tx_size, - int blk_row, int blk_col, TxSetType tx_set_type, - TX_TYPE_PRUNE_MODE prune_mode) { - static const int tx_type_table_2D[16] = { - DCT_DCT, DCT_ADST, DCT_FLIPADST, V_DCT, - ADST_DCT, ADST_ADST, ADST_FLIPADST, V_ADST, - FLIPADST_DCT, FLIPADST_ADST, FLIPADST_FLIPADST, V_FLIPADST, - H_DCT, H_ADST, H_FLIPADST, IDTX - }; - if (tx_set_type != EXT_TX_SET_ALL16 && - tx_set_type != EXT_TX_SET_DTT9_IDTX_1DDCT) - return 0; - const NN_CONFIG *nn_config_hor = av1_tx_type_nnconfig_map_hor[tx_size]; - const NN_CONFIG *nn_config_ver = av1_tx_type_nnconfig_map_ver[tx_size]; - if (!nn_config_hor || !nn_config_ver) return 0; // Model not established yet. - - aom_clear_system_state(); - float hfeatures[16], vfeatures[16]; - float hscores[4], vscores[4]; - float scores_2D[16]; - const int bw = tx_size_wide[tx_size]; - const int bh = tx_size_high[tx_size]; - const int hfeatures_num = bw <= 8 ? bw : bw / 2; - const int vfeatures_num = bh <= 8 ? bh : bh / 2; - assert(hfeatures_num <= 16); - assert(vfeatures_num <= 16); - - const struct macroblock_plane *const p = &x->plane[0]; - const int diff_stride = block_size_wide[bsize]; - const int16_t *diff = p->src_diff + 4 * blk_row * diff_stride + 4 * blk_col; - get_energy_distribution_finer(diff, diff_stride, bw, bh, hfeatures, - vfeatures); - get_horver_correlation_full(diff, diff_stride, bw, bh, - &hfeatures[hfeatures_num - 1], - &vfeatures[vfeatures_num - 1]); - av1_nn_predict(hfeatures, nn_config_hor, hscores); - av1_nn_predict(vfeatures, nn_config_ver, vscores); - - float score_2D_average = 0.0f; - for (int i = 0; i < 4; i++) { - float *cur_scores_2D = scores_2D + i * 4; - cur_scores_2D[0] = vscores[i] * hscores[0]; - cur_scores_2D[1] = vscores[i] * hscores[1]; - cur_scores_2D[2] = vscores[i] * hscores[2]; - cur_scores_2D[3] = vscores[i] * hscores[3]; - score_2D_average += cur_scores_2D[0] + cur_scores_2D[1] + cur_scores_2D[2] + - cur_scores_2D[3]; - } - score_2D_average /= 16; - - const int prune_aggr_table[2][2] = { { 6, 4 }, { 10, 7 } }; - int pruning_aggressiveness = 1; - if (tx_set_type == EXT_TX_SET_ALL16) { - score_2D_transform_pow8(scores_2D, (10 - score_2D_average)); - pruning_aggressiveness = - prune_aggr_table[prune_mode - PRUNE_2D_ACCURATE][0]; - } else if (tx_set_type == EXT_TX_SET_DTT9_IDTX_1DDCT) { - score_2D_transform_pow8(scores_2D, (20 - score_2D_average)); - pruning_aggressiveness = - prune_aggr_table[prune_mode - PRUNE_2D_ACCURATE][1]; - } - - // Always keep the TX type with the highest score, prune all others with - // score below score_thresh. - int max_score_i = 0; - float max_score = 0.0f; - for (int i = 0; i < 16; i++) { - if (scores_2D[i] > max_score && - av1_ext_tx_used[tx_set_type][tx_type_table_2D[i]]) { - max_score = scores_2D[i]; - max_score_i = i; - } - } - - const float score_thresh = - prune_2D_adaptive_thresholds[tx_size][pruning_aggressiveness - 1]; - - uint16_t prune_bitmask = 0; - for (int i = 0; i < 16; i++) { - if (scores_2D[i] < score_thresh && i != max_score_i) - prune_bitmask |= (1 << tx_type_table_2D[i]); - } - return prune_bitmask; -} - -// ((prune >> vtx_tab[tx_type]) & 1) -static const uint16_t prune_v_mask[] = { - 0x0000, 0x0425, 0x108a, 0x14af, 0x4150, 0x4575, 0x51da, 0x55ff, - 0xaa00, 0xae25, 0xba8a, 0xbeaf, 0xeb50, 0xef75, 0xfbda, 0xffff, -}; - -// ((prune >> (htx_tab[tx_type] + 8)) & 1) -static const uint16_t prune_h_mask[] = { - 0x0000, 0x0813, 0x210c, 0x291f, 0x80e0, 0x88f3, 0xa1ec, 0xa9ff, - 0x5600, 0x5e13, 0x770c, 0x7f1f, 0xd6e0, 0xdef3, 0xf7ec, 0xffff, -}; - -static INLINE uint16_t gen_tx_search_prune_mask(int tx_search_prune) { - uint8_t prune_v = tx_search_prune & 0x0F; - uint8_t prune_h = (tx_search_prune >> 8) & 0x0F; - return (prune_v_mask[prune_v] & prune_h_mask[prune_h]); -} - -static void prune_tx(const AV1_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, - const MACROBLOCKD *const xd, int tx_set_type) { - x->tx_search_prune[tx_set_type] = 0; - x->tx_split_prune_flag = 0; - const MB_MODE_INFO *mbmi = xd->mi[0]; - if (!is_inter_block(mbmi) || cpi->sf.tx_type_search.prune_mode == NO_PRUNE || - x->use_default_inter_tx_type || xd->lossless[mbmi->segment_id] || - x->cb_partition_scan) - return; - int tx_set = ext_tx_set_index[1][tx_set_type]; - assert(tx_set >= 0); - const int *tx_set_1D = ext_tx_used_inter_1D[tx_set]; - int prune = 0; - switch (cpi->sf.tx_type_search.prune_mode) { - case NO_PRUNE: return; - case PRUNE_ONE: - if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) return; - prune = prune_one_for_sby(cpi, bsize, x, xd); - x->tx_search_prune[tx_set_type] = gen_tx_search_prune_mask(prune); - break; - case PRUNE_TWO: - if (!(tx_set_1D[FLIPADST_1D] & tx_set_1D[ADST_1D])) { - if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) return; - prune = prune_two_for_sby(cpi, bsize, x, xd, 0, 1); - } else if (!(tx_set_1D[DCT_1D] & tx_set_1D[IDTX_1D])) { - prune = prune_two_for_sby(cpi, bsize, x, xd, 1, 0); - } else { - prune = prune_two_for_sby(cpi, bsize, x, xd, 1, 1); - } - x->tx_search_prune[tx_set_type] = gen_tx_search_prune_mask(prune); - break; - case PRUNE_2D_ACCURATE: - case PRUNE_2D_FAST: break; - default: assert(0); - } -} - -static void model_rd_from_sse(const AV1_COMP *const cpi, - const MACROBLOCK *const x, BLOCK_SIZE plane_bsize, - int plane, int64_t sse, int num_samples, - int *rate, int64_t *dist) { - (void)num_samples; - const MACROBLOCKD *const xd = &x->e_mbd; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int dequant_shift = - (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3; - - // Fast approximate the modelling function. - if (cpi->sf.simple_model_rd_from_var) { - const int64_t square_error = sse; - int quantizer = pd->dequant_Q3[1] >> dequant_shift; - if (quantizer < 120) - *rate = (int)AOMMIN( - (square_error * (280 - quantizer)) >> (16 - AV1_PROB_COST_SHIFT), - INT_MAX); - else - *rate = 0; - assert(*rate >= 0); - *dist = (square_error * quantizer) >> 8; - } else { - av1_model_rd_from_var_lapndz(sse, num_pels_log2_lookup[plane_bsize], - pd->dequant_Q3[1] >> dequant_shift, rate, - dist); - } - *dist <<= 4; -} - -#if CONFIG_COLLECT_INTER_MODE_RD_STATS -static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x) { - const AV1_COMMON *cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - const MACROBLOCKD *xd = &x->e_mbd; - const MB_MODE_INFO *mbmi = xd->mi[0]; - int64_t total_sse = 0; - for (int plane = 0; plane < num_planes; ++plane) { - const struct macroblock_plane *const p = &x->plane[plane]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE bs = get_plane_block_size(mbmi->sb_type, pd->subsampling_x, - pd->subsampling_y); - unsigned int sse; - - if (x->skip_chroma_rd && plane) continue; - - cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, - &sse); - total_sse += sse; - } - total_sse <<= 4; - return total_sse; -} -#endif - -static void model_rd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bsize, - MACROBLOCK *x, MACROBLOCKD *xd, int plane_from, - int plane_to, int mi_row, int mi_col, - int *out_rate_sum, int64_t *out_dist_sum, - int *skip_txfm_sb, int64_t *skip_sse_sb, - int *plane_rate, int64_t *plane_sse, - int64_t *plane_dist) { - // Note our transform coeffs are 8 times an orthogonal transform. - // Hence quantizer step is also 8 times. To get effective quantizer - // we need to divide by 8 before sending to modeling function. - int plane; - (void)mi_row; - (void)mi_col; - const int ref = xd->mi[0]->ref_frame[0]; - - int64_t rate_sum = 0; - int64_t dist_sum = 0; - int64_t total_sse = 0; - - for (plane = plane_from; plane <= plane_to; ++plane) { - struct macroblock_plane *const p = &x->plane[plane]; - struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); - const int bw = block_size_wide[plane_bsize]; - const int bh = block_size_high[plane_bsize]; - int64_t sse; - int rate; - int64_t dist; - - if (x->skip_chroma_rd && plane) continue; - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf, - pd->dst.stride, bw, bh); - } else { - sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw, - bh); - } - sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2); - - model_rd_from_sse(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, &dist); - - if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX); - - total_sse += sse; - rate_sum += rate; - dist_sum += dist; - if (plane_rate) plane_rate[plane] = rate; - if (plane_sse) plane_sse[plane] = sse; - if (plane_dist) plane_dist[plane] = dist; - assert(rate_sum >= 0); - } - - if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0; - if (skip_sse_sb) *skip_sse_sb = total_sse << 4; - rate_sum = AOMMIN(rate_sum, INT_MAX); - *out_rate_sum = (int)rate_sum; - *out_dist_sum = dist_sum; -} - -static void check_block_skip(const AV1_COMP *const cpi, BLOCK_SIZE bsize, - MACROBLOCK *x, MACROBLOCKD *xd, int plane_from, - int plane_to, int *skip_txfm_sb) { - *skip_txfm_sb = 1; - for (int plane = plane_from; plane <= plane_to; ++plane) { - struct macroblock_plane *const p = &x->plane[plane]; - struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE bs = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); - unsigned int sse; - - if (x->skip_chroma_rd && plane) continue; - - // Since fast HBD variance functions scale down sse by 4 bit, we first use - // fast vf implementation to rule out blocks with non-zero scaled sse. Then, - // only if the source is HBD and the scaled sse is 0, accurate sse - // computation is applied to determine if the sse is really 0. This step is - // necessary for HBD lossless coding. - cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, - &sse); - if (sse) { - *skip_txfm_sb = 0; - return; - } else if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - uint64_t sse64 = aom_highbd_sse_odd_size( - p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, - block_size_wide[bs], block_size_high[bs]); - - if (sse64) { - *skip_txfm_sb = 0; - return; - } - } - } - return; -} - -int64_t av1_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, - intptr_t block_size, int64_t *ssz) { - int i; - int64_t error = 0, sqcoeff = 0; - - for (i = 0; i < block_size; i++) { - const int diff = coeff[i] - dqcoeff[i]; - error += diff * diff; - sqcoeff += coeff[i] * coeff[i]; - } - - *ssz = sqcoeff; - return error; -} - -int64_t av1_highbd_block_error_c(const tran_low_t *coeff, - const tran_low_t *dqcoeff, intptr_t block_size, - int64_t *ssz, int bd) { - int i; - int64_t error = 0, sqcoeff = 0; - int shift = 2 * (bd - 8); - int rounding = shift > 0 ? 1 << (shift - 1) : 0; - - for (i = 0; i < block_size; i++) { - const int64_t diff = coeff[i] - dqcoeff[i]; - error += diff * diff; - sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i]; - } - assert(error >= 0 && sqcoeff >= 0); - error = (error + rounding) >> shift; - sqcoeff = (sqcoeff + rounding) >> shift; - - *ssz = sqcoeff; - return error; -} - -// Get transform block visible dimensions cropped to the MI units. -static void get_txb_dimensions(const MACROBLOCKD *xd, int plane, - BLOCK_SIZE plane_bsize, int blk_row, int blk_col, - BLOCK_SIZE tx_bsize, int *width, int *height, - int *visible_width, int *visible_height) { - assert(tx_bsize <= plane_bsize); - int txb_height = block_size_high[tx_bsize]; - int txb_width = block_size_wide[tx_bsize]; - const int block_height = block_size_high[plane_bsize]; - const int block_width = block_size_wide[plane_bsize]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - // TODO(aconverse@google.com): Investigate using crop_width/height here rather - // than the MI size - const int block_rows = - (xd->mb_to_bottom_edge >= 0) - ? block_height - : (xd->mb_to_bottom_edge >> (3 + pd->subsampling_y)) + block_height; - const int block_cols = - (xd->mb_to_right_edge >= 0) - ? block_width - : (xd->mb_to_right_edge >> (3 + pd->subsampling_x)) + block_width; - const int tx_unit_size = tx_size_wide_log2[0]; - if (width) *width = txb_width; - if (height) *height = txb_height; - *visible_width = clamp(block_cols - (blk_col << tx_unit_size), 0, txb_width); - *visible_height = - clamp(block_rows - (blk_row << tx_unit_size), 0, txb_height); -} - -// Compute the pixel domain distortion from src and dst on all visible 4x4s in -// the -// transform block. -static unsigned pixel_dist(const AV1_COMP *const cpi, const MACROBLOCK *x, - int plane, const uint8_t *src, const int src_stride, - const uint8_t *dst, const int dst_stride, - int blk_row, int blk_col, - const BLOCK_SIZE plane_bsize, - const BLOCK_SIZE tx_bsize) { - int txb_rows, txb_cols, visible_rows, visible_cols; - const MACROBLOCKD *xd = &x->e_mbd; - - get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, - &txb_cols, &txb_rows, &visible_cols, &visible_rows); - assert(visible_rows > 0); - assert(visible_cols > 0); - -#if CONFIG_DIST_8X8 - if (x->using_dist_8x8 && plane == 0) - return (unsigned)av1_dist_8x8(cpi, x, src, src_stride, dst, dst_stride, - tx_bsize, txb_cols, txb_rows, visible_cols, - visible_rows, x->qindex); -#endif // CONFIG_DIST_8X8 - - unsigned sse = pixel_dist_visible_only(cpi, x, src, src_stride, dst, - dst_stride, tx_bsize, txb_rows, - txb_cols, visible_rows, visible_cols); - - return sse; -} - -// Compute the pixel domain distortion from diff on all visible 4x4s in the -// transform block. -static INLINE int64_t pixel_diff_dist(const MACROBLOCK *x, int plane, - int blk_row, int blk_col, - const BLOCK_SIZE plane_bsize, - const BLOCK_SIZE tx_bsize) { - int visible_rows, visible_cols; - const MACROBLOCKD *xd = &x->e_mbd; - get_txb_dimensions(xd, plane, plane_bsize, blk_row, blk_col, tx_bsize, NULL, - NULL, &visible_cols, &visible_rows); - const int diff_stride = block_size_wide[plane_bsize]; - const int16_t *diff = x->plane[plane].src_diff; -#if CONFIG_DIST_8X8 - int txb_height = block_size_high[tx_bsize]; - int txb_width = block_size_wide[tx_bsize]; - if (x->using_dist_8x8 && plane == 0) { - const int src_stride = x->plane[plane].src.stride; - const int src_idx = (blk_row * src_stride + blk_col) - << tx_size_wide_log2[0]; - const int diff_idx = (blk_row * diff_stride + blk_col) - << tx_size_wide_log2[0]; - const uint8_t *src = &x->plane[plane].src.buf[src_idx]; - return dist_8x8_diff(x, src, src_stride, diff + diff_idx, diff_stride, - txb_width, txb_height, visible_cols, visible_rows, - x->qindex); - } -#endif - diff += ((blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]); - return aom_sum_squares_2d_i16(diff, diff_stride, visible_cols, visible_rows); -} - -int av1_count_colors(const uint8_t *src, int stride, int rows, int cols, - int *val_count) { - const int max_pix_val = 1 << 8; - memset(val_count, 0, max_pix_val * sizeof(val_count[0])); - for (int r = 0; r < rows; ++r) { - for (int c = 0; c < cols; ++c) { - const int this_val = src[r * stride + c]; - assert(this_val < max_pix_val); - ++val_count[this_val]; - } - } - int n = 0; - for (int i = 0; i < max_pix_val; ++i) { - if (val_count[i]) ++n; - } - return n; -} - -int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols, - int bit_depth, int *val_count) { - assert(bit_depth <= 12); - const int max_pix_val = 1 << bit_depth; - const uint16_t *src = CONVERT_TO_SHORTPTR(src8); - memset(val_count, 0, max_pix_val * sizeof(val_count[0])); - for (int r = 0; r < rows; ++r) { - for (int c = 0; c < cols; ++c) { - const int this_val = src[r * stride + c]; - assert(this_val < max_pix_val); - if (this_val >= max_pix_val) return 0; - ++val_count[this_val]; - } - } - int n = 0; - for (int i = 0; i < max_pix_val; ++i) { - if (val_count[i]) ++n; - } - return n; -} - -static void inverse_transform_block_facade(MACROBLOCKD *xd, int plane, - int block, int blk_row, int blk_col, - int eob, int reduced_tx_set) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - const PLANE_TYPE plane_type = get_plane_type(plane); - const TX_SIZE tx_size = av1_get_tx_size(plane, xd); - const TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, - tx_size, reduced_tx_set); - const int dst_stride = pd->dst.stride; - uint8_t *dst = - &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; - av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, dst, - dst_stride, eob, reduced_tx_set); -} - -static int find_tx_size_rd_info(TXB_RD_RECORD *cur_record, const uint32_t hash); - -static uint32_t get_intra_txb_hash(MACROBLOCK *x, int plane, int blk_row, - int blk_col, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size) { - int16_t tmp_data[64 * 64]; - const int diff_stride = block_size_wide[plane_bsize]; - const int16_t *diff = x->plane[plane].src_diff; - const int16_t *cur_diff_row = diff + 4 * blk_row * diff_stride + 4 * blk_col; - const int txb_w = tx_size_wide[tx_size]; - const int txb_h = tx_size_high[tx_size]; - uint8_t *hash_data = (uint8_t *)cur_diff_row; - if (txb_w != diff_stride) { - int16_t *cur_hash_row = tmp_data; - for (int i = 0; i < txb_h; i++) { - memcpy(cur_hash_row, cur_diff_row, sizeof(*diff) * txb_w); - cur_hash_row += txb_w; - cur_diff_row += diff_stride; - } - hash_data = (uint8_t *)tmp_data; - } - CRC32C *crc = &x->mb_rd_record.crc_calculator; - const uint32_t hash = av1_get_crc32c_value(crc, hash_data, 2 * txb_w * txb_h); - return (hash << 5) + tx_size; -} - -static INLINE void dist_block_tx_domain(MACROBLOCK *x, int plane, int block, - TX_SIZE tx_size, int64_t *out_dist, - int64_t *out_sse) { - MACROBLOCKD *const xd = &x->e_mbd; - const struct macroblock_plane *const p = &x->plane[plane]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - // Transform domain distortion computation is more efficient as it does - // not involve an inverse transform, but it is less accurate. - const int buffer_length = av1_get_max_eob(tx_size); - int64_t this_sse; - // TX-domain results need to shift down to Q2/D10 to match pixel - // domain distortion values which are in Q2^2 - int shift = (MAX_TX_SCALE - av1_get_tx_scale(tx_size)) * 2; - tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); - tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - *out_dist = av1_highbd_block_error(coeff, dqcoeff, buffer_length, &this_sse, - xd->bd); - else - *out_dist = av1_block_error(coeff, dqcoeff, buffer_length, &this_sse); - - *out_dist = RIGHT_SIGNED_SHIFT(*out_dist, shift); - *out_sse = RIGHT_SIGNED_SHIFT(this_sse, shift); -} - -static INLINE int64_t dist_block_px_domain(const AV1_COMP *cpi, MACROBLOCK *x, - int plane, BLOCK_SIZE plane_bsize, - int block, int blk_row, int blk_col, - TX_SIZE tx_size) { - MACROBLOCKD *const xd = &x->e_mbd; - const struct macroblock_plane *const p = &x->plane[plane]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const uint16_t eob = p->eobs[block]; - const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; - const int bsw = block_size_wide[tx_bsize]; - const int bsh = block_size_high[tx_bsize]; - const int src_stride = x->plane[plane].src.stride; - const int dst_stride = xd->plane[plane].dst.stride; - // Scale the transform block index to pixel unit. - const int src_idx = (blk_row * src_stride + blk_col) << tx_size_wide_log2[0]; - const int dst_idx = (blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]; - const uint8_t *src = &x->plane[plane].src.buf[src_idx]; - const uint8_t *dst = &xd->plane[plane].dst.buf[dst_idx]; - const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); - - assert(cpi != NULL); - assert(tx_size_wide_log2[0] == tx_size_high_log2[0]); - - uint8_t *recon; - DECLARE_ALIGNED(16, uint16_t, recon16[MAX_TX_SQUARE]); - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - recon = CONVERT_TO_BYTEPTR(recon16); - av1_highbd_convolve_2d_copy_sr(CONVERT_TO_SHORTPTR(dst), dst_stride, - CONVERT_TO_SHORTPTR(recon), MAX_TX_SIZE, bsw, - bsh, NULL, NULL, 0, 0, NULL, xd->bd); - } else { - recon = (uint8_t *)recon16; - av1_convolve_2d_copy_sr(dst, dst_stride, recon, MAX_TX_SIZE, bsw, bsh, NULL, - NULL, 0, 0, NULL); - } - - const PLANE_TYPE plane_type = get_plane_type(plane); - TX_TYPE tx_type = av1_get_tx_type(plane_type, xd, blk_row, blk_col, tx_size, - cpi->common.reduced_tx_set_used); - av1_inverse_transform_block(xd, dqcoeff, plane, tx_type, tx_size, recon, - MAX_TX_SIZE, eob, - cpi->common.reduced_tx_set_used); - - return 16 * pixel_dist(cpi, x, plane, src, src_stride, recon, MAX_TX_SIZE, - blk_row, blk_col, plane_bsize, tx_bsize); -} - -static double get_mean(const int16_t *diff, int stride, int w, int h) { - double sum = 0.0; - for (int j = 0; j < h; ++j) { - for (int i = 0; i < w; ++i) { - sum += diff[j * stride + i]; - } - } - assert(w > 0 && h > 0); - return sum / (w * h); -} - -static double get_sse_norm(const int16_t *diff, int stride, int w, int h) { - double sum = 0.0; - for (int j = 0; j < h; ++j) { - for (int i = 0; i < w; ++i) { - const int err = diff[j * stride + i]; - sum += err * err; - } - } - assert(w > 0 && h > 0); - return sum / (w * h); -} - -static double get_sad_norm(const int16_t *diff, int stride, int w, int h) { - double sum = 0.0; - for (int j = 0; j < h; ++j) { - for (int i = 0; i < w; ++i) { - sum += abs(diff[j * stride + i]); - } - } - assert(w > 0 && h > 0); - return sum / (w * h); -} - -static void get_2x2_normalized_sses_and_sads( - const AV1_COMP *const cpi, BLOCK_SIZE tx_bsize, const uint8_t *const src, - int src_stride, const uint8_t *const dst, int dst_stride, - const int16_t *const src_diff, int diff_stride, double *const sse_norm_arr, - double *const sad_norm_arr) { - const BLOCK_SIZE tx_bsize_half = - get_partition_subsize(tx_bsize, PARTITION_SPLIT); - if (tx_bsize_half == BLOCK_INVALID) { // manually calculate stats - const int half_width = block_size_wide[tx_bsize] / 2; - const int half_height = block_size_high[tx_bsize] / 2; - for (int row = 0; row < 2; ++row) { - for (int col = 0; col < 2; ++col) { - const int16_t *const this_src_diff = - src_diff + row * half_height * diff_stride + col * half_width; - if (sse_norm_arr) { - sse_norm_arr[row * 2 + col] = - get_sse_norm(this_src_diff, diff_stride, half_width, half_height); - } - if (sad_norm_arr) { - sad_norm_arr[row * 2 + col] = - get_sad_norm(this_src_diff, diff_stride, half_width, half_height); - } - } - } - } else { // use function pointers to calculate stats - const int half_width = block_size_wide[tx_bsize_half]; - const int half_height = block_size_high[tx_bsize_half]; - const int num_samples_half = half_width * half_height; - for (int row = 0; row < 2; ++row) { - for (int col = 0; col < 2; ++col) { - const uint8_t *const this_src = - src + row * half_height * src_stride + col * half_width; - const uint8_t *const this_dst = - dst + row * half_height * dst_stride + col * half_width; - - if (sse_norm_arr) { - unsigned int this_sse; - cpi->fn_ptr[tx_bsize_half].vf(this_src, src_stride, this_dst, - dst_stride, &this_sse); - sse_norm_arr[row * 2 + col] = (double)this_sse / num_samples_half; - } - - if (sad_norm_arr) { - const unsigned int this_sad = cpi->fn_ptr[tx_bsize_half].sdf( - this_src, src_stride, this_dst, dst_stride); - sad_norm_arr[row * 2 + col] = (double)this_sad / num_samples_half; - } - } - } - } -} - -// NOTE: CONFIG_COLLECT_RD_STATS has 3 possible values -// 0: Do not collect any RD stats -// 1: Collect RD stats for transform units -// 2: Collect RD stats for partition units -#if CONFIG_COLLECT_RD_STATS - -#if CONFIG_COLLECT_RD_STATS == 1 -static void PrintTransformUnitStats(const AV1_COMP *const cpi, MACROBLOCK *x, - const RD_STATS *const rd_stats, int blk_row, - int blk_col, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, TX_TYPE tx_type, - int64_t rd) { - if (rd_stats->rate == INT_MAX || rd_stats->dist == INT64_MAX) return; - - // Generate small sample to restrict output size. - static unsigned int seed = 21743; - if (lcg_rand16(&seed) % 256 > 0) return; - - const char output_file[] = "tu_stats.txt"; - FILE *fout = fopen(output_file, "a"); - if (!fout) return; - - const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; - const MACROBLOCKD *const xd = &x->e_mbd; - const int plane = 0; - struct macroblock_plane *const p = &x->plane[plane]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int txw = tx_size_wide[tx_size]; - const int txh = tx_size_high[tx_size]; - const int dequant_shift = - (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3; - const int q_step = pd->dequant_Q3[1] >> dequant_shift; - const double num_samples = txw * txh; - - const double rate_norm = (double)rd_stats->rate / num_samples; - const double dist_norm = (double)rd_stats->dist / num_samples; - - fprintf(fout, "%g %g", rate_norm, dist_norm); - - const int src_stride = p->src.stride; - const uint8_t *const src = - &p->src.buf[(blk_row * src_stride + blk_col) << tx_size_wide_log2[0]]; - const int dst_stride = pd->dst.stride; - const uint8_t *const dst = - &pd->dst.buf[(blk_row * dst_stride + blk_col) << tx_size_wide_log2[0]]; - unsigned int sse; - cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse); - const double sse_norm = (double)sse / num_samples; - - const unsigned int sad = - cpi->fn_ptr[tx_bsize].sdf(src, src_stride, dst, dst_stride); - const double sad_norm = (double)sad / num_samples; - - fprintf(fout, " %g %g", sse_norm, sad_norm); - - const int diff_stride = block_size_wide[plane_bsize]; - const int16_t *const src_diff = - &p->src_diff[(blk_row * diff_stride + blk_col) << tx_size_wide_log2[0]]; - - double sse_norm_arr[4], sad_norm_arr[4]; - get_2x2_normalized_sses_and_sads(cpi, tx_bsize, src, src_stride, dst, - dst_stride, src_diff, diff_stride, - sse_norm_arr, sad_norm_arr); - for (int i = 0; i < 4; ++i) { - fprintf(fout, " %g", sse_norm_arr[i]); - } - for (int i = 0; i < 4; ++i) { - fprintf(fout, " %g", sad_norm_arr[i]); - } - - const TX_TYPE_1D tx_type_1d_row = htx_tab[tx_type]; - const TX_TYPE_1D tx_type_1d_col = vtx_tab[tx_type]; - - fprintf(fout, " %d %d %d %d %d", q_step, tx_size_wide[tx_size], - tx_size_high[tx_size], tx_type_1d_row, tx_type_1d_col); - - int model_rate; - int64_t model_dist; - model_rd_sse_fn[MODELRD_CURVFIT](cpi, x, tx_bsize, plane, sse, num_samples, - &model_rate, &model_dist); - const double model_rate_norm = (double)model_rate / num_samples; - const double model_dist_norm = (double)model_dist / num_samples; - fprintf(fout, " %g %g", model_rate_norm, model_dist_norm); - - const double mean = get_mean(src_diff, diff_stride, txw, txh); - double hor_corr, vert_corr; - get_horver_correlation(src_diff, diff_stride, txw, txh, &hor_corr, - &vert_corr); - fprintf(fout, " %g %g %g", mean, hor_corr, vert_corr); - - double hdist[4] = { 0 }, vdist[4] = { 0 }; - get_energy_distribution_fine(cpi, tx_bsize, src, src_stride, dst, dst_stride, - 1, hdist, vdist); - fprintf(fout, " %g %g %g %g %g %g %g %g", hdist[0], hdist[1], hdist[2], - hdist[3], vdist[0], vdist[1], vdist[2], vdist[3]); - - fprintf(fout, " %d %" PRId64, x->rdmult, rd); - - fprintf(fout, "\n"); - fclose(fout); -} -#endif // CONFIG_COLLECT_RD_STATS == 1 - -#if CONFIG_COLLECT_RD_STATS >= 2 -static void PrintPredictionUnitStats(const AV1_COMP *const cpi, MACROBLOCK *x, - const RD_STATS *const rd_stats, - BLOCK_SIZE plane_bsize) { - if (rd_stats->invalid_rate) return; - if (rd_stats->rate == INT_MAX || rd_stats->dist == INT64_MAX) return; - - // Generate small sample to restrict output size. - static unsigned int seed = 95014; - if (lcg_rand16(&seed) % 256 > 0) return; - - const char output_file[] = "pu_stats.txt"; - FILE *fout = fopen(output_file, "a"); - if (!fout) return; - - const MACROBLOCKD *const xd = &x->e_mbd; - const int plane = 0; - struct macroblock_plane *const p = &x->plane[plane]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int diff_stride = block_size_wide[plane_bsize]; - int bw, bh; - get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL, &bw, - &bh); - const int num_samples = bw * bh; - const int dequant_shift = - (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3; - const int q_step = pd->dequant_Q3[1] >> dequant_shift; - - const double rate_norm = (double)rd_stats->rate / num_samples; - const double dist_norm = (double)rd_stats->dist / num_samples; - const double rdcost_norm = - (double)RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) / num_samples; - - fprintf(fout, "%g %g %g", rate_norm, dist_norm, rdcost_norm); - - const int src_stride = p->src.stride; - const uint8_t *const src = p->src.buf; - const int dst_stride = pd->dst.stride; - const uint8_t *const dst = pd->dst.buf; - const int16_t *const src_diff = p->src_diff; - const int shift = (xd->bd - 8); - - int64_t sse = aom_sum_squares_2d_i16(src_diff, diff_stride, bw, bh); - sse = ROUND_POWER_OF_TWO(sse, shift * 2); - const double sse_norm = (double)sse / num_samples; - - const unsigned int sad = - cpi->fn_ptr[plane_bsize].sdf(src, src_stride, dst, dst_stride); - const double sad_norm = - (double)sad / (1 << num_pels_log2_lookup[plane_bsize]); - - fprintf(fout, " %g %g", sse_norm, sad_norm); - - double sse_norm_arr[4], sad_norm_arr[4]; - get_2x2_normalized_sses_and_sads(cpi, plane_bsize, src, src_stride, dst, - dst_stride, src_diff, diff_stride, - sse_norm_arr, sad_norm_arr); - if (shift) { - for (int k = 0; k < 4; ++k) sse_norm_arr[k] /= (1 << (2 * shift)); - for (int k = 0; k < 4; ++k) sad_norm_arr[k] /= (1 << shift); - } - for (int i = 0; i < 4; ++i) { - fprintf(fout, " %g", sse_norm_arr[i]); - } - for (int i = 0; i < 4; ++i) { - fprintf(fout, " %g", sad_norm_arr[i]); - } - - fprintf(fout, " %d %d %d %d", q_step, x->rdmult, bw, bh); - - int model_rate; - int64_t model_dist; - model_rd_sse_fn[MODELRD_CURVFIT](cpi, x, plane_bsize, plane, sse, num_samples, - &model_rate, &model_dist); - const double model_rdcost_norm = - (double)RDCOST(x->rdmult, model_rate, model_dist) / num_samples; - const double model_rate_norm = (double)model_rate / num_samples; - const double model_dist_norm = (double)model_dist / num_samples; - fprintf(fout, " %g %g %g", model_rate_norm, model_dist_norm, - model_rdcost_norm); - - double mean = get_mean(src_diff, diff_stride, bw, bh); - mean /= (1 << shift); - double hor_corr, vert_corr; - get_horver_correlation(src_diff, diff_stride, bw, bh, &hor_corr, &vert_corr); - fprintf(fout, " %g %g %g", mean, hor_corr, vert_corr); - - double hdist[4] = { 0 }, vdist[4] = { 0 }; - get_energy_distribution_fine(cpi, plane_bsize, src, src_stride, dst, - dst_stride, 1, hdist, vdist); - fprintf(fout, " %g %g %g %g %g %g %g %g", hdist[0], hdist[1], hdist[2], - hdist[3], vdist[0], vdist[1], vdist[2], vdist[3]); - - fprintf(fout, "\n"); - fclose(fout); -} -#endif // CONFIG_COLLECT_RD_STATS >= 2 -#endif // CONFIG_COLLECT_RD_STATS - -static void model_rd_with_dnn(const AV1_COMP *const cpi, - const MACROBLOCK *const x, BLOCK_SIZE plane_bsize, - int plane, int64_t sse, int num_samples, - int *rate, int64_t *dist) { - const MACROBLOCKD *const xd = &x->e_mbd; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int log_numpels = num_pels_log2_lookup[plane_bsize]; - - const int dequant_shift = - (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3; - const int q_step = AOMMAX(pd->dequant_Q3[1] >> dequant_shift, 1); - - const struct macroblock_plane *const p = &x->plane[plane]; - int bw, bh; - get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL, &bw, - &bh); - const int src_stride = p->src.stride; - const uint8_t *const src = p->src.buf; - const int dst_stride = pd->dst.stride; - const uint8_t *const dst = pd->dst.buf; - const int16_t *const src_diff = p->src_diff; - const int diff_stride = block_size_wide[plane_bsize]; - const int shift = (xd->bd - 8); - - if (sse == 0) { - if (rate) *rate = 0; - if (dist) *dist = 0; - return; - } - if (plane) { - int model_rate; - int64_t model_dist; - model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, num_samples, - &model_rate, &model_dist); - if (rate) *rate = model_rate; - if (dist) *dist = model_dist; - return; - } - - aom_clear_system_state(); - const double sse_norm = (double)sse / num_samples; - - double sse_norm_arr[4]; - get_2x2_normalized_sses_and_sads(cpi, plane_bsize, src, src_stride, dst, - dst_stride, src_diff, diff_stride, - sse_norm_arr, NULL); - double mean = get_mean(src_diff, bw, bw, bh); - if (shift) { - for (int k = 0; k < 4; ++k) sse_norm_arr[k] /= (1 << (2 * shift)); - mean /= (1 << shift); - } - double sse_norm_sum = 0.0, sse_frac_arr[3]; - for (int k = 0; k < 4; ++k) sse_norm_sum += sse_norm_arr[k]; - for (int k = 0; k < 3; ++k) - sse_frac_arr[k] = - sse_norm_sum > 0.0 ? sse_norm_arr[k] / sse_norm_sum : 0.25; - const double q_sqr = (double)(q_step * q_step); - const double q_sqr_by_sse_norm = q_sqr / (sse_norm + 1.0); - const double mean_sqr_by_sse_norm = mean * mean / (sse_norm + 1.0); - double hor_corr, vert_corr; - get_horver_correlation(src_diff, diff_stride, bw, bh, &hor_corr, &vert_corr); - - float features[NUM_FEATURES_PUSTATS]; - features[0] = (float)hor_corr; - features[1] = (float)log_numpels; - features[2] = (float)mean_sqr_by_sse_norm; - features[3] = (float)q_sqr_by_sse_norm; - features[4] = (float)sse_frac_arr[0]; - features[5] = (float)sse_frac_arr[1]; - features[6] = (float)sse_frac_arr[2]; - features[7] = (float)vert_corr; - - float rate_f, dist_by_sse_norm_f; - av1_nn_predict(features, &av1_pustats_dist_nnconfig, &dist_by_sse_norm_f); - av1_nn_predict(features, &av1_pustats_rate_nnconfig, &rate_f); - const float dist_f = (float)((double)dist_by_sse_norm_f * (1.0 + sse_norm)); - int rate_i = (int)(AOMMAX(0.0, rate_f * num_samples) + 0.5); - int64_t dist_i = (int64_t)(AOMMAX(0.0, dist_f * num_samples) + 0.5); - aom_clear_system_state(); - - // Check if skip is better - if (rate_i == 0) { - dist_i = sse << 4; - } else if (RDCOST(x->rdmult, rate_i, dist_i) >= - RDCOST(x->rdmult, 0, sse << 4)) { - rate_i = 0; - dist_i = sse << 4; - } - - if (rate) *rate = rate_i; - if (dist) *dist = dist_i; - return; -} - -static void model_rd_for_sb_with_dnn( - const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, - int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, - int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, - int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) { - (void)mi_row; - (void)mi_col; - // Note our transform coeffs are 8 times an orthogonal transform. - // Hence quantizer step is also 8 times. To get effective quantizer - // we need to divide by 8 before sending to modeling function. - const int ref = xd->mi[0]->ref_frame[0]; - - int64_t rate_sum = 0; - int64_t dist_sum = 0; - int64_t total_sse = 0; - - for (int plane = plane_from; plane <= plane_to; ++plane) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); - int64_t dist, sse; - int rate; - - if (x->skip_chroma_rd && plane) continue; - - const struct macroblock_plane *const p = &x->plane[plane]; - const int shift = (xd->bd - 8); - int bw, bh; - get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL, - &bw, &bh); - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf, - pd->dst.stride, bw, bh); - } else { - sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw, - bh); - } - sse = ROUND_POWER_OF_TWO(sse, shift * 2); - - model_rd_with_dnn(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, &dist); - - if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX); - - total_sse += sse; - rate_sum += rate; - dist_sum += dist; - - if (plane_rate) plane_rate[plane] = rate; - if (plane_sse) plane_sse[plane] = sse; - if (plane_dist) plane_dist[plane] = dist; - } - - if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0; - if (skip_sse_sb) *skip_sse_sb = total_sse << 4; - *out_rate_sum = (int)rate_sum; - *out_dist_sum = dist_sum; -} - -// Fits a surface for rate and distortion using as features: -// log2(sse_norm + 1) and log2(sse_norm/qstep^2) -static void model_rd_with_surffit(const AV1_COMP *const cpi, - const MACROBLOCK *const x, - BLOCK_SIZE plane_bsize, int plane, - int64_t sse, int num_samples, int *rate, - int64_t *dist) { - (void)cpi; - (void)plane_bsize; - const MACROBLOCKD *const xd = &x->e_mbd; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int dequant_shift = - (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3; - const int qstep = AOMMAX(pd->dequant_Q3[1] >> dequant_shift, 1); - if (sse == 0) { - if (rate) *rate = 0; - if (dist) *dist = 0; - return; - } - aom_clear_system_state(); - const double sse_norm = (double)sse / num_samples; - const double qstepsqr = (double)qstep * qstep; - const double xm = log(sse_norm + 1.0) / log(2.0); - const double yl = log(sse_norm / qstepsqr) / log(2.0); - double rate_f, dist_by_sse_norm_f; - - av1_model_rd_surffit(xm, yl, &rate_f, &dist_by_sse_norm_f); - - const double dist_f = dist_by_sse_norm_f * sse_norm; - int rate_i = (int)(AOMMAX(0.0, rate_f * num_samples) + 0.5); - int64_t dist_i = (int64_t)(AOMMAX(0.0, dist_f * num_samples) + 0.5); - aom_clear_system_state(); - - // Check if skip is better - if (rate_i == 0) { - dist_i = sse << 4; - } else if (RDCOST(x->rdmult, rate_i, dist_i) >= - RDCOST(x->rdmult, 0, sse << 4)) { - rate_i = 0; - dist_i = sse << 4; - } - - if (rate) *rate = rate_i; - if (dist) *dist = dist_i; -} - -static void model_rd_for_sb_with_surffit( - const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, - int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, - int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, - int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) { - (void)mi_row; - (void)mi_col; - // Note our transform coeffs are 8 times an orthogonal transform. - // Hence quantizer step is also 8 times. To get effective quantizer - // we need to divide by 8 before sending to modeling function. - const int ref = xd->mi[0]->ref_frame[0]; - - int64_t rate_sum = 0; - int64_t dist_sum = 0; - int64_t total_sse = 0; - - for (int plane = plane_from; plane <= plane_to; ++plane) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); - int64_t dist, sse; - int rate; - - if (x->skip_chroma_rd && plane) continue; - - int bw, bh; - const struct macroblock_plane *const p = &x->plane[plane]; - const int shift = (xd->bd - 8); - get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL, - &bw, &bh); - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf, - pd->dst.stride, bw, bh); - } else { - sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw, - bh); - } - sse = ROUND_POWER_OF_TWO(sse, shift * 2); - - model_rd_with_surffit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, - &dist); - - if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX); - - total_sse += sse; - rate_sum += rate; - dist_sum += dist; - - if (plane_rate) plane_rate[plane] = rate; - if (plane_sse) plane_sse[plane] = sse; - if (plane_dist) plane_dist[plane] = dist; - } - - if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0; - if (skip_sse_sb) *skip_sse_sb = total_sse << 4; - *out_rate_sum = (int)rate_sum; - *out_dist_sum = dist_sum; -} - -// Fits a curve for rate and distortion using as feature: -// log2(sse_norm/qstep^2) -static void model_rd_with_curvfit(const AV1_COMP *const cpi, - const MACROBLOCK *const x, - BLOCK_SIZE plane_bsize, int plane, - int64_t sse, int num_samples, int *rate, - int64_t *dist) { - (void)cpi; - (void)plane_bsize; - const MACROBLOCKD *const xd = &x->e_mbd; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int dequant_shift = - (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : 3; - const int qstep = AOMMAX(pd->dequant_Q3[1] >> dequant_shift, 1); - - if (sse == 0) { - if (rate) *rate = 0; - if (dist) *dist = 0; - return; - } - aom_clear_system_state(); - const double sse_norm = (double)sse / num_samples; - const double qstepsqr = (double)qstep * qstep; - const double xqr = log(sse_norm / qstepsqr) / log(2.0); - - double rate_f, dist_by_sse_norm_f; - av1_model_rd_curvfit(xqr, &rate_f, &dist_by_sse_norm_f); - - const double dist_f = dist_by_sse_norm_f * sse_norm; - int rate_i = (int)(AOMMAX(0.0, rate_f * num_samples) + 0.5); - int64_t dist_i = (int64_t)(AOMMAX(0.0, dist_f * num_samples) + 0.5); - aom_clear_system_state(); - - // Check if skip is better - if (rate_i == 0) { - dist_i = sse << 4; - } else if (RDCOST(x->rdmult, rate_i, dist_i) >= - RDCOST(x->rdmult, 0, sse << 4)) { - rate_i = 0; - dist_i = sse << 4; - } - - if (rate) *rate = rate_i; - if (dist) *dist = dist_i; -} - -static void model_rd_for_sb_with_curvfit( - const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, - int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, - int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, - int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) { - (void)mi_row; - (void)mi_col; - // Note our transform coeffs are 8 times an orthogonal transform. - // Hence quantizer step is also 8 times. To get effective quantizer - // we need to divide by 8 before sending to modeling function. - const int ref = xd->mi[0]->ref_frame[0]; - - int64_t rate_sum = 0; - int64_t dist_sum = 0; - int64_t total_sse = 0; - - for (int plane = plane_from; plane <= plane_to; ++plane) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); - int64_t dist, sse; - int rate; - - if (x->skip_chroma_rd && plane) continue; - - int bw, bh; - const struct macroblock_plane *const p = &x->plane[plane]; - const int shift = (xd->bd - 8); - get_txb_dimensions(xd, plane, plane_bsize, 0, 0, plane_bsize, NULL, NULL, - &bw, &bh); - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf, - pd->dst.stride, bw, bh); - } else { - sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw, - bh); - } - - sse = ROUND_POWER_OF_TWO(sse, shift * 2); - model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, - &dist); - - if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX); - - total_sse += sse; - rate_sum += rate; - dist_sum += dist; - - if (plane_rate) plane_rate[plane] = rate; - if (plane_sse) plane_sse[plane] = sse; - if (plane_dist) plane_dist[plane] = dist; - } - - if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0; - if (skip_sse_sb) *skip_sse_sb = total_sse << 4; - *out_rate_sum = (int)rate_sum; - *out_dist_sum = dist_sum; -} - -static void model_rd_for_sb_with_fullrdy( - const AV1_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, - int plane_from, int plane_to, int mi_row, int mi_col, int *out_rate_sum, - int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb, - int *plane_rate, int64_t *plane_sse, int64_t *plane_dist) { - const int ref = xd->mi[0]->ref_frame[0]; - - int64_t rate_sum = 0; - int64_t dist_sum = 0; - int64_t total_sse = 0; - - for (int plane = plane_from; plane <= plane_to; ++plane) { - struct macroblock_plane *const p = &x->plane[plane]; - struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); - const int bw = block_size_wide[plane_bsize]; - const int bh = block_size_high[plane_bsize]; - int64_t sse; - int rate; - int64_t dist; - - if (x->skip_chroma_rd && plane) continue; - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - sse = aom_highbd_sse(p->src.buf, p->src.stride, pd->dst.buf, - pd->dst.stride, bw, bh); - } else { - sse = aom_sse(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, bw, - bh); - } - sse = ROUND_POWER_OF_TWO(sse, (xd->bd - 8) * 2); - - RD_STATS rd_stats; - if (plane == 0) { - select_tx_type_yrd(cpi, x, &rd_stats, bsize, mi_row, mi_col, INT64_MAX); - if (rd_stats.invalid_rate) { - rate = 0; - dist = sse << 4; - } else { - rate = rd_stats.rate; - dist = rd_stats.dist; - } - } else { - model_rd_with_curvfit(cpi, x, plane_bsize, plane, sse, bw * bh, &rate, - &dist); - } - - if (plane == 0) x->pred_sse[ref] = (unsigned int)AOMMIN(sse, UINT_MAX); - - total_sse += sse; - rate_sum += rate; - dist_sum += dist; - - if (plane_rate) plane_rate[plane] = rate; - if (plane_sse) plane_sse[plane] = sse; - if (plane_dist) plane_dist[plane] = dist; - } - - if (skip_txfm_sb) *skip_txfm_sb = total_sse == 0; - if (skip_sse_sb) *skip_sse_sb = total_sse << 4; - *out_rate_sum = (int)rate_sum; - *out_dist_sum = dist_sum; -} - -static int64_t search_txk_type(const AV1_COMP *cpi, MACROBLOCK *x, int plane, - int block, int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, - const TXB_CTX *const txb_ctx, - FAST_TX_SEARCH_MODE ftxs_mode, - int use_fast_coef_costing, int64_t ref_best_rd, - RD_STATS *best_rd_stats) { - const AV1_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - struct macroblockd_plane *const pd = &xd->plane[plane]; - MB_MODE_INFO *mbmi = xd->mi[0]; - const int is_inter = is_inter_block(mbmi); - int64_t best_rd = INT64_MAX; - uint16_t best_eob = 0; - TX_TYPE best_tx_type = DCT_DCT; - TX_TYPE last_tx_type = TX_TYPES; - const int fast_tx_search = ftxs_mode & FTXS_DCT_AND_1D_DCT_ONLY; - // The buffer used to swap dqcoeff in macroblockd_plane so we can keep dqcoeff - // of the best tx_type - DECLARE_ALIGNED(32, tran_low_t, this_dqcoeff[MAX_SB_SQUARE]); - tran_low_t *orig_dqcoeff = pd->dqcoeff; - tran_low_t *best_dqcoeff = this_dqcoeff; - const int txk_type_idx = - av1_get_txk_type_index(plane_bsize, blk_row, blk_col); - av1_invalid_rd_stats(best_rd_stats); - - TXB_RD_INFO *intra_txb_rd_info = NULL; - uint16_t cur_joint_ctx = 0; - const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); - const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2); - const int within_border = - mi_row >= xd->tile.mi_row_start && - (mi_row + mi_size_high[plane_bsize] < xd->tile.mi_row_end) && - mi_col >= xd->tile.mi_col_start && - (mi_col + mi_size_wide[plane_bsize] < xd->tile.mi_col_end); - if (within_border && cpi->sf.use_intra_txb_hash && frame_is_intra_only(cm) && - !is_inter && plane == 0 && - tx_size_wide[tx_size] == tx_size_high[tx_size]) { - const uint32_t intra_hash = - get_intra_txb_hash(x, plane, blk_row, blk_col, plane_bsize, tx_size); - const int intra_hash_idx = - find_tx_size_rd_info(&x->txb_rd_record_intra, intra_hash); - intra_txb_rd_info = &x->txb_rd_record_intra.tx_rd_info[intra_hash_idx]; - - cur_joint_ctx = (txb_ctx->dc_sign_ctx << 8) + txb_ctx->txb_skip_ctx; - if (intra_txb_rd_info->entropy_context == cur_joint_ctx && - x->txb_rd_record_intra.tx_rd_info[intra_hash_idx].valid) { - mbmi->txk_type[txk_type_idx] = intra_txb_rd_info->tx_type; - const TX_TYPE ref_tx_type = - av1_get_tx_type(get_plane_type(plane), &x->e_mbd, blk_row, blk_col, - tx_size, cpi->common.reduced_tx_set_used); - if (ref_tx_type == intra_txb_rd_info->tx_type) { - best_rd_stats->rate = intra_txb_rd_info->rate; - best_rd_stats->dist = intra_txb_rd_info->dist; - best_rd_stats->sse = intra_txb_rd_info->sse; - best_rd_stats->skip = intra_txb_rd_info->eob == 0; - x->plane[plane].eobs[block] = intra_txb_rd_info->eob; - x->plane[plane].txb_entropy_ctx[block] = - intra_txb_rd_info->txb_entropy_ctx; - best_rd = RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->dist); - best_eob = intra_txb_rd_info->eob; - best_tx_type = intra_txb_rd_info->tx_type; - update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, - best_tx_type); - goto RECON_INTRA; - } - } - } - - int rate_cost = 0; - TX_TYPE txk_start = DCT_DCT; - TX_TYPE txk_end = TX_TYPES - 1; - if ((!is_inter && x->use_default_intra_tx_type) || - (is_inter && x->use_default_inter_tx_type)) { - txk_start = txk_end = get_default_tx_type(0, xd, tx_size); - } else if (x->rd_model == LOW_TXFM_RD || x->cb_partition_scan) { - if (plane == 0) txk_end = DCT_DCT; - } - - uint8_t best_txb_ctx = 0; - const TxSetType tx_set_type = - av1_get_ext_tx_set_type(tx_size, is_inter, cm->reduced_tx_set_used); - - TX_TYPE uv_tx_type = DCT_DCT; - if (plane) { - // tx_type of PLANE_TYPE_UV should be the same as PLANE_TYPE_Y - uv_tx_type = txk_start = txk_end = - av1_get_tx_type(get_plane_type(plane), xd, blk_row, blk_col, tx_size, - cm->reduced_tx_set_used); - } - const uint16_t ext_tx_used_flag = av1_ext_tx_used_flag[tx_set_type]; - if (xd->lossless[mbmi->segment_id] || txsize_sqr_up_map[tx_size] > TX_32X32 || - ext_tx_used_flag == 0x0001) { - txk_start = txk_end = DCT_DCT; - } - uint16_t allowed_tx_mask = 0; // 1: allow; 0: skip. - if (txk_start == txk_end) { - allowed_tx_mask = 1 << txk_start; - allowed_tx_mask &= ext_tx_used_flag; - } else if (fast_tx_search) { - allowed_tx_mask = 0x0c01; // V_DCT, H_DCT, DCT_DCT - allowed_tx_mask &= ext_tx_used_flag; - } else { - assert(plane == 0); - allowed_tx_mask = ext_tx_used_flag; - // !fast_tx_search && txk_end != txk_start && plane == 0 - const int do_prune = cpi->sf.tx_type_search.prune_mode > NO_PRUNE; - if (do_prune && is_inter) { - if (cpi->sf.tx_type_search.prune_mode >= PRUNE_2D_ACCURATE) { - const uint16_t prune = - prune_tx_2D(x, plane_bsize, tx_size, blk_row, blk_col, tx_set_type, - cpi->sf.tx_type_search.prune_mode); - allowed_tx_mask &= (~prune); - } else { - allowed_tx_mask &= (~x->tx_search_prune[tx_set_type]); - } - } - } - // Need to have at least one transform type allowed. - if (allowed_tx_mask == 0) { - txk_start = txk_end = (plane ? uv_tx_type : DCT_DCT); - allowed_tx_mask = (1 << txk_start); - } - - int use_transform_domain_distortion = - (cpi->sf.use_transform_domain_distortion > 0) && - // Any 64-pt transforms only preserves half the coefficients. - // Therefore transform domain distortion is not valid for these - // transform sizes. - txsize_sqr_up_map[tx_size] != TX_64X64; -#if CONFIG_DIST_8X8 - if (x->using_dist_8x8) use_transform_domain_distortion = 0; -#endif - int calc_pixel_domain_distortion_final = - cpi->sf.use_transform_domain_distortion == 1 && - use_transform_domain_distortion && x->rd_model != LOW_TXFM_RD && - !x->cb_partition_scan; - if (calc_pixel_domain_distortion_final && - (txk_start == txk_end || allowed_tx_mask == 0x0001)) - calc_pixel_domain_distortion_final = use_transform_domain_distortion = 0; - - const uint16_t *eobs_ptr = x->plane[plane].eobs; - - const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; - int64_t block_sse = - pixel_diff_dist(x, plane, blk_row, blk_col, plane_bsize, tx_bsize); - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - block_sse = ROUND_POWER_OF_TWO(block_sse, (xd->bd - 8) * 2); - block_sse *= 16; - - for (TX_TYPE tx_type = txk_start; tx_type <= txk_end; ++tx_type) { - if (!(allowed_tx_mask & (1 << tx_type))) continue; - if (plane == 0) mbmi->txk_type[txk_type_idx] = tx_type; - RD_STATS this_rd_stats; - av1_invalid_rd_stats(&this_rd_stats); - - if (!cpi->optimize_seg_arr[mbmi->segment_id]) { - av1_xform_quant( - cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, tx_type, - USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP); - rate_cost = av1_cost_coeffs(cm, x, plane, block, tx_size, tx_type, - txb_ctx, use_fast_coef_costing); - } else { - av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, - tx_size, tx_type, AV1_XFORM_QUANT_FP); - if (cpi->sf.optimize_b_precheck && best_rd < INT64_MAX && - eobs_ptr[block] >= 4) { - // Calculate distortion quickly in transform domain. - dist_block_tx_domain(x, plane, block, tx_size, &this_rd_stats.dist, - &this_rd_stats.sse); - - const int64_t best_rd_ = AOMMIN(best_rd, ref_best_rd); - const int64_t dist_cost_estimate = - RDCOST(x->rdmult, 0, AOMMIN(this_rd_stats.dist, this_rd_stats.sse)); - if (dist_cost_estimate - (dist_cost_estimate >> 3) > best_rd_) continue; - - rate_cost = av1_cost_coeffs(cm, x, plane, block, tx_size, tx_type, - txb_ctx, use_fast_coef_costing); - const int64_t rd_estimate = - AOMMIN(RDCOST(x->rdmult, rate_cost, this_rd_stats.dist), - RDCOST(x->rdmult, 0, this_rd_stats.sse)); - if (rd_estimate - (rd_estimate >> 3) > best_rd_) continue; - } - av1_optimize_b(cpi, x, plane, block, tx_size, tx_type, txb_ctx, 1, - &rate_cost); - } - if (eobs_ptr[block] == 0) { - // When eob is 0, pixel domain distortion is more efficient and accurate. - this_rd_stats.dist = this_rd_stats.sse = block_sse; - } else if (use_transform_domain_distortion) { - dist_block_tx_domain(x, plane, block, tx_size, &this_rd_stats.dist, - &this_rd_stats.sse); - } else { - this_rd_stats.dist = dist_block_px_domain( - cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size); - this_rd_stats.sse = block_sse; - } - - this_rd_stats.rate = rate_cost; - - const int64_t rd = - RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist); - - if (rd < best_rd) { - best_rd = rd; - *best_rd_stats = this_rd_stats; - best_tx_type = tx_type; - best_txb_ctx = x->plane[plane].txb_entropy_ctx[block]; - best_eob = x->plane[plane].eobs[block]; - last_tx_type = best_tx_type; - - // Swap qcoeff and dqcoeff buffers - tran_low_t *const tmp_dqcoeff = best_dqcoeff; - best_dqcoeff = pd->dqcoeff; - pd->dqcoeff = tmp_dqcoeff; - } - -#if CONFIG_COLLECT_RD_STATS == 1 - if (plane == 0) { - PrintTransformUnitStats(cpi, x, &this_rd_stats, blk_row, blk_col, - plane_bsize, tx_size, tx_type, rd); - } -#endif // CONFIG_COLLECT_RD_STATS == 1 - - if (cpi->sf.adaptive_txb_search_level) { - if ((best_rd - (best_rd >> cpi->sf.adaptive_txb_search_level)) > - ref_best_rd) { - break; - } - } - - // Skip transform type search when we found the block has been quantized to - // all zero and at the same time, it has better rdcost than doing transform. - if (cpi->sf.tx_type_search.skip_tx_search && !best_eob) break; - } - - assert(best_rd != INT64_MAX); - - best_rd_stats->skip = best_eob == 0; - if (plane == 0) { - update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, - best_tx_type); - } - x->plane[plane].txb_entropy_ctx[block] = best_txb_ctx; - x->plane[plane].eobs[block] = best_eob; - - pd->dqcoeff = best_dqcoeff; - - if (calc_pixel_domain_distortion_final && best_eob) { - best_rd_stats->dist = dist_block_px_domain( - cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size); - best_rd_stats->sse = block_sse; - } - - if (intra_txb_rd_info != NULL) { - intra_txb_rd_info->valid = 1; - intra_txb_rd_info->entropy_context = cur_joint_ctx; - intra_txb_rd_info->rate = best_rd_stats->rate; - intra_txb_rd_info->dist = best_rd_stats->dist; - intra_txb_rd_info->sse = best_rd_stats->sse; - intra_txb_rd_info->eob = best_eob; - intra_txb_rd_info->txb_entropy_ctx = best_txb_ctx; - if (plane == 0) intra_txb_rd_info->tx_type = best_tx_type; - } - -RECON_INTRA: - if (!is_inter && best_eob && - (blk_row + tx_size_high_unit[tx_size] < mi_size_high[plane_bsize] || - blk_col + tx_size_wide_unit[tx_size] < mi_size_wide[plane_bsize])) { - // intra mode needs decoded result such that the next transform block - // can use it for prediction. - // if the last search tx_type is the best tx_type, we don't need to - // do this again - if (best_tx_type != last_tx_type) { - if (!cpi->optimize_seg_arr[mbmi->segment_id]) { - av1_xform_quant( - cm, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, - best_tx_type, - USE_B_QUANT_NO_TRELLIS ? AV1_XFORM_QUANT_B : AV1_XFORM_QUANT_FP); - } else { - av1_xform_quant(cm, x, plane, block, blk_row, blk_col, plane_bsize, - tx_size, best_tx_type, AV1_XFORM_QUANT_FP); - av1_optimize_b(cpi, x, plane, block, tx_size, best_tx_type, txb_ctx, 1, - &rate_cost); - } - } - - inverse_transform_block_facade(xd, plane, block, blk_row, blk_col, - x->plane[plane].eobs[block], - cm->reduced_tx_set_used); - - // This may happen because of hash collision. The eob stored in the hash - // table is non-zero, but the real eob is zero. We need to make sure tx_type - // is DCT_DCT in this case. - if (plane == 0 && x->plane[plane].eobs[block] == 0 && - best_tx_type != DCT_DCT) { - update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, - DCT_DCT); - } - } - pd->dqcoeff = orig_dqcoeff; - - return best_rd; -} - -static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, - BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { - struct rdcost_block_args *args = arg; - MACROBLOCK *const x = args->x; - MACROBLOCKD *const xd = &x->e_mbd; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const AV1_COMP *cpi = args->cpi; - ENTROPY_CONTEXT *a = args->t_above + blk_col; - ENTROPY_CONTEXT *l = args->t_left + blk_row; - const AV1_COMMON *cm = &cpi->common; - int64_t rd1, rd2, rd; - RD_STATS this_rd_stats; - - av1_init_rd_stats(&this_rd_stats); - - if (args->exit_early) { - args->incomplete_exit = 1; - return; - } - - if (!is_inter_block(mbmi)) { - av1_predict_intra_block_facade(cm, xd, plane, blk_col, blk_row, tx_size); - av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size); - } - TXB_CTX txb_ctx; - get_txb_ctx(plane_bsize, tx_size, plane, a, l, &txb_ctx); - search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, - &txb_ctx, args->ftxs_mode, args->use_fast_coef_costing, - args->best_rd - args->this_rd, &this_rd_stats); - - if (plane == AOM_PLANE_Y && xd->cfl.store_y) { - assert(!is_inter_block(mbmi) || plane_bsize < BLOCK_8X8); - cfl_store_tx(xd, blk_row, blk_col, tx_size, plane_bsize); - } - -#if CONFIG_RD_DEBUG - av1_update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col, - this_rd_stats.rate); -#endif // CONFIG_RD_DEBUG - av1_set_txb_context(x, plane, block, tx_size, a, l); - - const int blk_idx = - blk_row * (block_size_wide[plane_bsize] >> tx_size_wide_log2[0]) + - blk_col; - - if (plane == 0) - set_blk_skip(x, plane, blk_idx, x->plane[plane].eobs[block] == 0); - else - set_blk_skip(x, plane, blk_idx, 0); - - rd1 = RDCOST(x->rdmult, this_rd_stats.rate, this_rd_stats.dist); - rd2 = RDCOST(x->rdmult, 0, this_rd_stats.sse); - - // TODO(jingning): temporarily enabled only for luma component - rd = AOMMIN(rd1, rd2); - - this_rd_stats.skip &= !x->plane[plane].eobs[block]; - - av1_merge_rd_stats(&args->rd_stats, &this_rd_stats); - - args->this_rd += rd; - - if (args->this_rd > args->best_rd) { - args->exit_early = 1; - return; - } -} - -static void txfm_rd_in_plane(MACROBLOCK *x, const AV1_COMP *cpi, - RD_STATS *rd_stats, int64_t ref_best_rd, int plane, - BLOCK_SIZE bsize, TX_SIZE tx_size, - int use_fast_coef_casting, - FAST_TX_SEARCH_MODE ftxs_mode) { - MACROBLOCKD *const xd = &x->e_mbd; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - struct rdcost_block_args args; - av1_zero(args); - args.x = x; - args.cpi = cpi; - args.best_rd = ref_best_rd; - args.use_fast_coef_costing = use_fast_coef_casting; - args.ftxs_mode = ftxs_mode; - av1_init_rd_stats(&args.rd_stats); - - if (plane == 0) xd->mi[0]->tx_size = tx_size; - - av1_get_entropy_contexts(bsize, pd, args.t_above, args.t_left); - - av1_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm, - &args); - - MB_MODE_INFO *const mbmi = xd->mi[0]; - const int is_inter = is_inter_block(mbmi); - const int invalid_rd = is_inter ? args.incomplete_exit : args.exit_early; - - if (invalid_rd) { - av1_invalid_rd_stats(rd_stats); - } else { - *rd_stats = args.rd_stats; - } -} - -static int tx_size_cost(const AV1_COMMON *const cm, const MACROBLOCK *const x, - BLOCK_SIZE bsize, TX_SIZE tx_size) { - const MACROBLOCKD *const xd = &x->e_mbd; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - - if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type)) { - const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize); - const int depth = tx_size_to_depth(tx_size, bsize); - const int tx_size_ctx = get_tx_size_context(xd); - int r_tx_size = x->tx_size_cost[tx_size_cat][tx_size_ctx][depth]; - return r_tx_size; - } else { - return 0; - } -} - -static int64_t txfm_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, - RD_STATS *rd_stats, int64_t ref_best_rd, BLOCK_SIZE bs, - TX_SIZE tx_size, FAST_TX_SEARCH_MODE ftxs_mode) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - int64_t rd = INT64_MAX; - const int skip_ctx = av1_get_skip_context(xd); - int s0, s1; - const int is_inter = is_inter_block(mbmi); - const int tx_select = - cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type); - int ctx = txfm_partition_context( - xd->above_txfm_context, xd->left_txfm_context, mbmi->sb_type, tx_size); - const int r_tx_size = is_inter ? x->txfm_partition_cost[ctx][0] - : tx_size_cost(cm, x, bs, tx_size); - - assert(IMPLIES(is_rect_tx(tx_size), is_rect_tx_allowed_bsize(bs))); - - s0 = x->skip_cost[skip_ctx][0]; - s1 = x->skip_cost[skip_ctx][1]; - - mbmi->tx_size = tx_size; - txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, AOM_PLANE_Y, bs, tx_size, - cpi->sf.use_fast_coef_costing, ftxs_mode); - if (rd_stats->rate == INT_MAX) return INT64_MAX; - - if (rd_stats->skip) { - if (is_inter) { - rd = RDCOST(x->rdmult, s1, rd_stats->sse); - } else { - rd = RDCOST(x->rdmult, s1 + r_tx_size * tx_select, rd_stats->sse); - } - } else { - rd = RDCOST(x->rdmult, rd_stats->rate + s0 + r_tx_size * tx_select, - rd_stats->dist); - } - - if (tx_select) rd_stats->rate += r_tx_size; - - if (is_inter && !xd->lossless[xd->mi[0]->segment_id] && !(rd_stats->skip)) - rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse)); - - return rd; -} - -static int64_t estimate_yrd_for_sb(const AV1_COMP *const cpi, BLOCK_SIZE bs, - MACROBLOCK *x, int *r, int64_t *d, int *s, - int64_t *sse, int64_t ref_best_rd) { - RD_STATS rd_stats; - av1_subtract_plane(x, bs, 0); - x->rd_model = LOW_TXFM_RD; - int64_t rd = txfm_yrd(cpi, x, &rd_stats, ref_best_rd, bs, - max_txsize_rect_lookup[bs], FTXS_NONE); - x->rd_model = FULL_TXFM_RD; - *r = rd_stats.rate; - *d = rd_stats.dist; - *s = rd_stats.skip; - *sse = rd_stats.sse; - return rd; -} - -static void choose_largest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x, - RD_STATS *rd_stats, int64_t ref_best_rd, - BLOCK_SIZE bs) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const int is_inter = is_inter_block(mbmi); - mbmi->tx_size = tx_size_from_tx_mode(bs, cm->tx_mode); - const TxSetType tx_set_type = - av1_get_ext_tx_set_type(mbmi->tx_size, is_inter, cm->reduced_tx_set_used); - prune_tx(cpi, bs, x, xd, tx_set_type); - txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, AOM_PLANE_Y, bs, - mbmi->tx_size, cpi->sf.use_fast_coef_costing, FTXS_NONE); - // Reset the pruning flags. - av1_zero(x->tx_search_prune); - x->tx_split_prune_flag = 0; -} - -static void choose_smallest_tx_size(const AV1_COMP *const cpi, MACROBLOCK *x, - RD_STATS *rd_stats, int64_t ref_best_rd, - BLOCK_SIZE bs) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - - mbmi->tx_size = TX_4X4; - txfm_rd_in_plane(x, cpi, rd_stats, ref_best_rd, 0, bs, mbmi->tx_size, - cpi->sf.use_fast_coef_costing, FTXS_NONE); -} - -static INLINE int bsize_to_num_blk(BLOCK_SIZE bsize) { - int num_blk = 1 << (num_pels_log2_lookup[bsize] - 2 * tx_size_wide_log2[0]); - return num_blk; -} - -static int get_search_init_depth(int mi_width, int mi_height, int is_inter, - const SPEED_FEATURES *sf) { - if (sf->tx_size_search_method == USE_LARGESTALL) return MAX_VARTX_DEPTH; - - if (sf->tx_size_search_lgr_block) { - if (mi_width > mi_size_wide[BLOCK_64X64] || - mi_height > mi_size_high[BLOCK_64X64]) - return MAX_VARTX_DEPTH; - } - - if (is_inter) { - return (mi_height != mi_width) ? sf->inter_tx_size_search_init_depth_rect - : sf->inter_tx_size_search_init_depth_sqr; - } else { - return (mi_height != mi_width) ? sf->intra_tx_size_search_init_depth_rect - : sf->intra_tx_size_search_init_depth_sqr; - } -} - -static void choose_tx_size_type_from_rd(const AV1_COMP *const cpi, - MACROBLOCK *x, RD_STATS *rd_stats, - int64_t ref_best_rd, BLOCK_SIZE bs) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - int64_t rd = INT64_MAX; - int n; - int start_tx; - int depth; - int64_t best_rd = INT64_MAX; - const TX_SIZE max_rect_tx_size = max_txsize_rect_lookup[bs]; - TX_SIZE best_tx_size = max_rect_tx_size; - TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN]; - uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; - const int n4 = bsize_to_num_blk(bs); - const int tx_select = cm->tx_mode == TX_MODE_SELECT; - - av1_invalid_rd_stats(rd_stats); - - if (tx_select) { - start_tx = max_rect_tx_size; - depth = get_search_init_depth(mi_size_wide[bs], mi_size_high[bs], - is_inter_block(mbmi), &cpi->sf); - } else { - const TX_SIZE chosen_tx_size = tx_size_from_tx_mode(bs, cm->tx_mode); - start_tx = chosen_tx_size; - depth = MAX_TX_DEPTH; - } - - prune_tx(cpi, bs, x, xd, EXT_TX_SET_ALL16); - - for (n = start_tx; depth <= MAX_TX_DEPTH; depth++, n = sub_tx_size_map[n]) { -#if CONFIG_DIST_8X8 - if (x->using_dist_8x8) { - if (tx_size_wide[n] < 8 || tx_size_high[n] < 8) continue; - } -#endif - RD_STATS this_rd_stats; - if (mbmi->ref_mv_idx > 0) x->rd_model = LOW_TXFM_RD; - rd = txfm_yrd(cpi, x, &this_rd_stats, ref_best_rd, bs, n, FTXS_NONE); - x->rd_model = FULL_TXFM_RD; - - if (rd < best_rd) { - memcpy(best_txk_type, mbmi->txk_type, - sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN); - memcpy(best_blk_skip, x->blk_skip, sizeof(best_blk_skip[0]) * n4); - best_tx_size = n; - best_rd = rd; - *rd_stats = this_rd_stats; - } - if (n == TX_4X4) break; - } - - if (rd_stats->rate != INT_MAX) { - mbmi->tx_size = best_tx_size; - memcpy(mbmi->txk_type, best_txk_type, - sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN); - memcpy(x->blk_skip, best_blk_skip, sizeof(best_blk_skip[0]) * n4); - } - - // Reset the pruning flags. - av1_zero(x->tx_search_prune); - x->tx_split_prune_flag = 0; -} - -static void super_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, - RD_STATS *rd_stats, BLOCK_SIZE bs, - int64_t ref_best_rd) { - MACROBLOCKD *xd = &x->e_mbd; - av1_init_rd_stats(rd_stats); - - assert(bs == xd->mi[0]->sb_type); - - if (xd->lossless[xd->mi[0]->segment_id]) { - choose_smallest_tx_size(cpi, x, rd_stats, ref_best_rd, bs); - } else if (cpi->sf.tx_size_search_method == USE_LARGESTALL) { - choose_largest_tx_size(cpi, x, rd_stats, ref_best_rd, bs); - } else { - choose_tx_size_type_from_rd(cpi, x, rd_stats, ref_best_rd, bs); - } -} - -// Return the rate cost for luma prediction mode info. of intra blocks. -static int intra_mode_info_cost_y(const AV1_COMP *cpi, const MACROBLOCK *x, - const MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, - int mode_cost) { - int total_rate = mode_cost; - const int use_palette = mbmi->palette_mode_info.palette_size[0] > 0; - const int use_filter_intra = mbmi->filter_intra_mode_info.use_filter_intra; - const int use_intrabc = mbmi->use_intrabc; - // Can only activate one mode. - assert(((mbmi->mode != DC_PRED) + use_palette + use_intrabc + - use_filter_intra) <= 1); - const int try_palette = - av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type); - if (try_palette && mbmi->mode == DC_PRED) { - const MACROBLOCKD *xd = &x->e_mbd; - const int bsize_ctx = av1_get_palette_bsize_ctx(bsize); - const int mode_ctx = av1_get_palette_mode_ctx(xd); - total_rate += x->palette_y_mode_cost[bsize_ctx][mode_ctx][use_palette]; - if (use_palette) { - const uint8_t *const color_map = xd->plane[0].color_index_map; - int block_width, block_height, rows, cols; - av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows, - &cols); - const int plt_size = mbmi->palette_mode_info.palette_size[0]; - int palette_mode_cost = - x->palette_y_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] + - write_uniform_cost(plt_size, color_map[0]); - uint16_t color_cache[2 * PALETTE_MAX_SIZE]; - const int n_cache = av1_get_palette_cache(xd, 0, color_cache); - palette_mode_cost += - av1_palette_color_cost_y(&mbmi->palette_mode_info, color_cache, - n_cache, cpi->common.seq_params.bit_depth); - palette_mode_cost += - av1_cost_color_map(x, 0, bsize, mbmi->tx_size, PALETTE_MAP); - total_rate += palette_mode_cost; - } - } - if (av1_filter_intra_allowed(&cpi->common, mbmi)) { - total_rate += x->filter_intra_cost[mbmi->sb_type][use_filter_intra]; - if (use_filter_intra) { - total_rate += x->filter_intra_mode_cost[mbmi->filter_intra_mode_info - .filter_intra_mode]; - } - } - if (av1_is_directional_mode(mbmi->mode)) { - if (av1_use_angle_delta(bsize)) { - total_rate += x->angle_delta_cost[mbmi->mode - V_PRED] - [MAX_ANGLE_DELTA + - mbmi->angle_delta[PLANE_TYPE_Y]]; - } - } - if (av1_allow_intrabc(&cpi->common)) - total_rate += x->intrabc_cost[use_intrabc]; - return total_rate; -} - -// Return the rate cost for chroma prediction mode info. of intra blocks. -static int intra_mode_info_cost_uv(const AV1_COMP *cpi, const MACROBLOCK *x, - const MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, - int mode_cost) { - int total_rate = mode_cost; - const int use_palette = mbmi->palette_mode_info.palette_size[1] > 0; - const UV_PREDICTION_MODE mode = mbmi->uv_mode; - // Can only activate one mode. - assert(((mode != UV_DC_PRED) + use_palette + mbmi->use_intrabc) <= 1); - - const int try_palette = - av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type); - if (try_palette && mode == UV_DC_PRED) { - const PALETTE_MODE_INFO *pmi = &mbmi->palette_mode_info; - total_rate += - x->palette_uv_mode_cost[pmi->palette_size[0] > 0][use_palette]; - if (use_palette) { - const int bsize_ctx = av1_get_palette_bsize_ctx(bsize); - const int plt_size = pmi->palette_size[1]; - const MACROBLOCKD *xd = &x->e_mbd; - const uint8_t *const color_map = xd->plane[1].color_index_map; - int palette_mode_cost = - x->palette_uv_size_cost[bsize_ctx][plt_size - PALETTE_MIN_SIZE] + - write_uniform_cost(plt_size, color_map[0]); - uint16_t color_cache[2 * PALETTE_MAX_SIZE]; - const int n_cache = av1_get_palette_cache(xd, 1, color_cache); - palette_mode_cost += av1_palette_color_cost_uv( - pmi, color_cache, n_cache, cpi->common.seq_params.bit_depth); - palette_mode_cost += - av1_cost_color_map(x, 1, bsize, mbmi->tx_size, PALETTE_MAP); - total_rate += palette_mode_cost; - } - } - if (av1_is_directional_mode(get_uv_mode(mode))) { - if (av1_use_angle_delta(bsize)) { - total_rate += - x->angle_delta_cost[mode - V_PRED][mbmi->angle_delta[PLANE_TYPE_UV] + - MAX_ANGLE_DELTA]; - } - } - return total_rate; -} - -static int conditional_skipintra(PREDICTION_MODE mode, - PREDICTION_MODE best_intra_mode) { - if (mode == D113_PRED && best_intra_mode != V_PRED && - best_intra_mode != D135_PRED) - return 1; - if (mode == D67_PRED && best_intra_mode != V_PRED && - best_intra_mode != D45_PRED) - return 1; - if (mode == D203_PRED && best_intra_mode != H_PRED && - best_intra_mode != D45_PRED) - return 1; - if (mode == D157_PRED && best_intra_mode != H_PRED && - best_intra_mode != D135_PRED) - return 1; - return 0; -} - -// Model based RD estimation for luma intra blocks. -static int64_t intra_model_yrd(const AV1_COMP *const cpi, MACROBLOCK *const x, - BLOCK_SIZE bsize, int mode_cost, int mi_row, - int mi_col) { - const AV1_COMMON *cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - assert(!is_inter_block(mbmi)); - RD_STATS this_rd_stats; - int row, col; - int64_t temp_sse, this_rd; - TX_SIZE tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode); - const int stepr = tx_size_high_unit[tx_size]; - const int stepc = tx_size_wide_unit[tx_size]; - const int max_blocks_wide = max_block_wide(xd, bsize, 0); - const int max_blocks_high = max_block_high(xd, bsize, 0); - mbmi->tx_size = tx_size; - // Prediction. - for (row = 0; row < max_blocks_high; row += stepr) { - for (col = 0; col < max_blocks_wide; col += stepc) { - av1_predict_intra_block_facade(cm, xd, 0, col, row, tx_size); - } - } - // RD estimation. - model_rd_sb_fn[MODELRD_TYPE_INTRA]( - cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &this_rd_stats.rate, - &this_rd_stats.dist, &this_rd_stats.skip, &temp_sse, NULL, NULL, NULL); - if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) { - mode_cost += - x->angle_delta_cost[mbmi->mode - V_PRED] - [MAX_ANGLE_DELTA + mbmi->angle_delta[PLANE_TYPE_Y]]; - } - if (mbmi->mode == DC_PRED && - av1_filter_intra_allowed_bsize(cm, mbmi->sb_type)) { - if (mbmi->filter_intra_mode_info.use_filter_intra) { - const int mode = mbmi->filter_intra_mode_info.filter_intra_mode; - mode_cost += x->filter_intra_cost[mbmi->sb_type][1] + - x->filter_intra_mode_cost[mode]; - } else { - mode_cost += x->filter_intra_cost[mbmi->sb_type][0]; - } - } - this_rd = - RDCOST(x->rdmult, this_rd_stats.rate + mode_cost, this_rd_stats.dist); - return this_rd; -} - -// Extends 'color_map' array from 'orig_width x orig_height' to 'new_width x -// new_height'. Extra rows and columns are filled in by copying last valid -// row/column. -static void extend_palette_color_map(uint8_t *const color_map, int orig_width, - int orig_height, int new_width, - int new_height) { - int j; - assert(new_width >= orig_width); - assert(new_height >= orig_height); - if (new_width == orig_width && new_height == orig_height) return; - - for (j = orig_height - 1; j >= 0; --j) { - memmove(color_map + j * new_width, color_map + j * orig_width, orig_width); - // Copy last column to extra columns. - memset(color_map + j * new_width + orig_width, - color_map[j * new_width + orig_width - 1], new_width - orig_width); - } - // Copy last row to extra rows. - for (j = orig_height; j < new_height; ++j) { - memcpy(color_map + j * new_width, color_map + (orig_height - 1) * new_width, - new_width); - } -} - -// Bias toward using colors in the cache. -// TODO(huisu): Try other schemes to improve compression. -static void optimize_palette_colors(uint16_t *color_cache, int n_cache, - int n_colors, int stride, int *centroids) { - if (n_cache <= 0) return; - for (int i = 0; i < n_colors * stride; i += stride) { - int min_diff = abs(centroids[i] - (int)color_cache[0]); - int idx = 0; - for (int j = 1; j < n_cache; ++j) { - const int this_diff = abs(centroids[i] - color_cache[j]); - if (this_diff < min_diff) { - min_diff = this_diff; - idx = j; - } - } - if (min_diff <= 1) centroids[i] = color_cache[idx]; - } -} - -// Given the base colors as specified in centroids[], calculate the RD cost -// of palette mode. -static void palette_rd_y(const AV1_COMP *const cpi, MACROBLOCK *x, - MB_MODE_INFO *mbmi, BLOCK_SIZE bsize, int mi_row, - int mi_col, int dc_mode_cost, const int *data, - int *centroids, int n, uint16_t *color_cache, - int n_cache, MB_MODE_INFO *best_mbmi, - uint8_t *best_palette_color_map, int64_t *best_rd, - int64_t *best_model_rd, int *rate, int *rate_tokenonly, - int *rate_overhead, int64_t *distortion, - int *skippable, PICK_MODE_CONTEXT *ctx, - uint8_t *blk_skip) { - optimize_palette_colors(color_cache, n_cache, n, 1, centroids); - int k = av1_remove_duplicates(centroids, n); - if (k < PALETTE_MIN_SIZE) { - // Too few unique colors to create a palette. And DC_PRED will work - // well for that case anyway. So skip. - return; - } - PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - if (cpi->common.seq_params.use_highbitdepth) - for (int i = 0; i < k; ++i) - pmi->palette_colors[i] = clip_pixel_highbd( - (int)centroids[i], cpi->common.seq_params.bit_depth); - else - for (int i = 0; i < k; ++i) - pmi->palette_colors[i] = clip_pixel(centroids[i]); - pmi->palette_size[0] = k; - MACROBLOCKD *const xd = &x->e_mbd; - uint8_t *const color_map = xd->plane[0].color_index_map; - int block_width, block_height, rows, cols; - av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows, - &cols); - av1_calc_indices(data, centroids, color_map, rows * cols, k, 1); - extend_palette_color_map(color_map, cols, rows, block_width, block_height); - const int palette_mode_cost = - intra_mode_info_cost_y(cpi, x, mbmi, bsize, dc_mode_cost); - int64_t this_model_rd = - intra_model_yrd(cpi, x, bsize, palette_mode_cost, mi_row, mi_col); - if (*best_model_rd != INT64_MAX && - this_model_rd > *best_model_rd + (*best_model_rd >> 1)) - return; - if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd; - RD_STATS tokenonly_rd_stats; - super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd); - if (tokenonly_rd_stats.rate == INT_MAX) return; - int this_rate = tokenonly_rd_stats.rate + palette_mode_cost; - int64_t this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); - if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) { - tokenonly_rd_stats.rate -= - tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size); - } - if (this_rd < *best_rd) { - *best_rd = this_rd; - memcpy(best_palette_color_map, color_map, - block_width * block_height * sizeof(color_map[0])); - *best_mbmi = *mbmi; - memcpy(blk_skip, x->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); - *rate_overhead = this_rate - tokenonly_rd_stats.rate; - if (rate) *rate = this_rate; - if (rate_tokenonly) *rate_tokenonly = tokenonly_rd_stats.rate; - if (distortion) *distortion = tokenonly_rd_stats.dist; - if (skippable) *skippable = tokenonly_rd_stats.skip; - } -} - -static int rd_pick_palette_intra_sby( - const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, - int mi_col, int dc_mode_cost, MB_MODE_INFO *best_mbmi, - uint8_t *best_palette_color_map, int64_t *best_rd, int64_t *best_model_rd, - int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, - PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip) { - int rate_overhead = 0; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - assert(!is_inter_block(mbmi)); - assert(av1_allow_palette(cpi->common.allow_screen_content_tools, bsize)); - const SequenceHeader *const seq_params = &cpi->common.seq_params; - int colors, n; - const int src_stride = x->plane[0].src.stride; - const uint8_t *const src = x->plane[0].src.buf; - uint8_t *const color_map = xd->plane[0].color_index_map; - int block_width, block_height, rows, cols; - av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows, - &cols); - - int count_buf[1 << 12]; // Maximum (1 << 12) color levels. - if (seq_params->use_highbitdepth) - colors = av1_count_colors_highbd(src, src_stride, rows, cols, - seq_params->bit_depth, count_buf); - else - colors = av1_count_colors(src, src_stride, rows, cols, count_buf); - mbmi->filter_intra_mode_info.use_filter_intra = 0; - - if (colors > 1 && colors <= 64) { - int r, c, i; - const int max_itr = 50; - int *const data = x->palette_buffer->kmeans_data_buf; - int centroids[PALETTE_MAX_SIZE]; - int lb, ub, val; - uint16_t *src16 = CONVERT_TO_SHORTPTR(src); - if (seq_params->use_highbitdepth) - lb = ub = src16[0]; - else - lb = ub = src[0]; - - if (seq_params->use_highbitdepth) { - for (r = 0; r < rows; ++r) { - for (c = 0; c < cols; ++c) { - val = src16[r * src_stride + c]; - data[r * cols + c] = val; - if (val < lb) - lb = val; - else if (val > ub) - ub = val; - } - } - } else { - for (r = 0; r < rows; ++r) { - for (c = 0; c < cols; ++c) { - val = src[r * src_stride + c]; - data[r * cols + c] = val; - if (val < lb) - lb = val; - else if (val > ub) - ub = val; - } - } - } - - mbmi->mode = DC_PRED; - mbmi->filter_intra_mode_info.use_filter_intra = 0; - - uint16_t color_cache[2 * PALETTE_MAX_SIZE]; - const int n_cache = av1_get_palette_cache(xd, 0, color_cache); - - // Find the dominant colors, stored in top_colors[]. - int top_colors[PALETTE_MAX_SIZE] = { 0 }; - for (i = 0; i < AOMMIN(colors, PALETTE_MAX_SIZE); ++i) { - int max_count = 0; - for (int j = 0; j < (1 << seq_params->bit_depth); ++j) { - if (count_buf[j] > max_count) { - max_count = count_buf[j]; - top_colors[i] = j; - } - } - assert(max_count > 0); - count_buf[top_colors[i]] = 0; - } - - // Try the dominant colors directly. - // TODO(huisu@google.com): Try to avoid duplicate computation in cases - // where the dominant colors and the k-means results are similar. - for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) { - for (i = 0; i < n; ++i) centroids[i] = top_colors[i]; - palette_rd_y(cpi, x, mbmi, bsize, mi_row, mi_col, dc_mode_cost, data, - centroids, n, color_cache, n_cache, best_mbmi, - best_palette_color_map, best_rd, best_model_rd, rate, - rate_tokenonly, &rate_overhead, distortion, skippable, ctx, - best_blk_skip); - } - - // K-means clustering. - for (n = AOMMIN(colors, PALETTE_MAX_SIZE); n >= 2; --n) { - if (colors == PALETTE_MIN_SIZE) { - // Special case: These colors automatically become the centroids. - assert(colors == n); - assert(colors == 2); - centroids[0] = lb; - centroids[1] = ub; - } else { - for (i = 0; i < n; ++i) { - centroids[i] = lb + (2 * i + 1) * (ub - lb) / n / 2; - } - av1_k_means(data, centroids, color_map, rows * cols, n, 1, max_itr); - } - palette_rd_y(cpi, x, mbmi, bsize, mi_row, mi_col, dc_mode_cost, data, - centroids, n, color_cache, n_cache, best_mbmi, - best_palette_color_map, best_rd, best_model_rd, rate, - rate_tokenonly, &rate_overhead, distortion, skippable, ctx, - best_blk_skip); - } - } - - if (best_mbmi->palette_mode_info.palette_size[0] > 0) { - memcpy(color_map, best_palette_color_map, - block_width * block_height * sizeof(best_palette_color_map[0])); - } - *mbmi = *best_mbmi; - return rate_overhead; -} - -// Return 1 if an filter intra mode is selected; return 0 otherwise. -static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x, - int mi_row, int mi_col, int *rate, - int *rate_tokenonly, int64_t *distortion, - int *skippable, BLOCK_SIZE bsize, - int mode_cost, int64_t *best_rd, - int64_t *best_model_rd, - PICK_MODE_CONTEXT *ctx) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - int filter_intra_selected_flag = 0; - FILTER_INTRA_MODE mode; - TX_SIZE best_tx_size = TX_8X8; - FILTER_INTRA_MODE_INFO filter_intra_mode_info; - TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN]; - (void)ctx; - av1_zero(filter_intra_mode_info); - mbmi->filter_intra_mode_info.use_filter_intra = 1; - mbmi->mode = DC_PRED; - mbmi->palette_mode_info.palette_size[0] = 0; - - for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) { - int64_t this_rd, this_model_rd; - RD_STATS tokenonly_rd_stats; - mbmi->filter_intra_mode_info.filter_intra_mode = mode; - this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost, mi_row, mi_col); - if (*best_model_rd != INT64_MAX && - this_model_rd > *best_model_rd + (*best_model_rd >> 1)) - continue; - if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd; - super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd); - if (tokenonly_rd_stats.rate == INT_MAX) continue; - const int this_rate = - tokenonly_rd_stats.rate + - intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost); - this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); - - if (this_rd < *best_rd) { - *best_rd = this_rd; - best_tx_size = mbmi->tx_size; - filter_intra_mode_info = mbmi->filter_intra_mode_info; - memcpy(best_txk_type, mbmi->txk_type, - sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN); - memcpy(ctx->blk_skip, x->blk_skip, - sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); - *rate = this_rate; - *rate_tokenonly = tokenonly_rd_stats.rate; - *distortion = tokenonly_rd_stats.dist; - *skippable = tokenonly_rd_stats.skip; - filter_intra_selected_flag = 1; - } - } - - if (filter_intra_selected_flag) { - mbmi->mode = DC_PRED; - mbmi->tx_size = best_tx_size; - mbmi->filter_intra_mode_info = filter_intra_mode_info; - memcpy(mbmi->txk_type, best_txk_type, - sizeof(best_txk_type[0]) * TXK_TYPE_BUF_LEN); - return 1; - } else { - return 0; - } -} - -// Run RD calculation with given luma intra prediction angle., and return -// the RD cost. Update the best mode info. if the RD cost is the best so far. -static int64_t calc_rd_given_intra_angle( - const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, - int mi_col, int mode_cost, int64_t best_rd_in, int8_t angle_delta, - int max_angle_delta, int *rate, RD_STATS *rd_stats, int *best_angle_delta, - TX_SIZE *best_tx_size, int64_t *best_rd, int64_t *best_model_rd, - TX_TYPE *best_txk_type, uint8_t *best_blk_skip) { - RD_STATS tokenonly_rd_stats; - int64_t this_rd, this_model_rd; - MB_MODE_INFO *mbmi = x->e_mbd.mi[0]; - const int n4 = bsize_to_num_blk(bsize); - assert(!is_inter_block(mbmi)); - mbmi->angle_delta[PLANE_TYPE_Y] = angle_delta; - this_model_rd = intra_model_yrd(cpi, x, bsize, mode_cost, mi_row, mi_col); - if (*best_model_rd != INT64_MAX && - this_model_rd > *best_model_rd + (*best_model_rd >> 1)) - return INT64_MAX; - if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd; - super_block_yrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in); - if (tokenonly_rd_stats.rate == INT_MAX) return INT64_MAX; - - int this_rate = - mode_cost + tokenonly_rd_stats.rate + - x->angle_delta_cost[mbmi->mode - V_PRED][max_angle_delta + angle_delta]; - this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); - - if (this_rd < *best_rd) { - memcpy(best_txk_type, mbmi->txk_type, - sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); - memcpy(best_blk_skip, x->blk_skip, sizeof(best_blk_skip[0]) * n4); - *best_rd = this_rd; - *best_angle_delta = mbmi->angle_delta[PLANE_TYPE_Y]; - *best_tx_size = mbmi->tx_size; - *rate = this_rate; - rd_stats->rate = tokenonly_rd_stats.rate; - rd_stats->dist = tokenonly_rd_stats.dist; - rd_stats->skip = tokenonly_rd_stats.skip; - } - return this_rd; -} - -// With given luma directional intra prediction mode, pick the best angle delta -// Return the RD cost corresponding to the best angle delta. -static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x, - int mi_row, int mi_col, int *rate, - RD_STATS *rd_stats, BLOCK_SIZE bsize, - int mode_cost, int64_t best_rd, - int64_t *best_model_rd) { - MB_MODE_INFO *mbmi = x->e_mbd.mi[0]; - assert(!is_inter_block(mbmi)); - - int best_angle_delta = 0; - int64_t rd_cost[2 * (MAX_ANGLE_DELTA + 2)]; - TX_SIZE best_tx_size = mbmi->tx_size; - TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN]; - uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; - - for (int i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX; - - int first_try = 1; - for (int angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) { - for (int i = 0; i < 2; ++i) { - const int64_t best_rd_in = - (best_rd == INT64_MAX) ? INT64_MAX - : (best_rd + (best_rd >> (first_try ? 3 : 5))); - const int64_t this_rd = calc_rd_given_intra_angle( - cpi, x, bsize, mi_row, mi_col, mode_cost, best_rd_in, - (1 - 2 * i) * angle_delta, MAX_ANGLE_DELTA, rate, rd_stats, - &best_angle_delta, &best_tx_size, &best_rd, best_model_rd, - best_txk_type, best_blk_skip); - rd_cost[2 * angle_delta + i] = this_rd; - if (first_try && this_rd == INT64_MAX) return best_rd; - first_try = 0; - if (angle_delta == 0) { - rd_cost[1] = this_rd; - break; - } - } - } - - assert(best_rd != INT64_MAX); - for (int angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) { - for (int i = 0; i < 2; ++i) { - int skip_search = 0; - const int64_t rd_thresh = best_rd + (best_rd >> 5); - if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh && - rd_cost[2 * (angle_delta - 1) + i] > rd_thresh) - skip_search = 1; - if (!skip_search) { - calc_rd_given_intra_angle(cpi, x, bsize, mi_row, mi_col, mode_cost, - best_rd, (1 - 2 * i) * angle_delta, - MAX_ANGLE_DELTA, rate, rd_stats, - &best_angle_delta, &best_tx_size, &best_rd, - best_model_rd, best_txk_type, best_blk_skip); - } - } - } - - if (rd_stats->rate != INT_MAX) { - mbmi->tx_size = best_tx_size; - mbmi->angle_delta[PLANE_TYPE_Y] = best_angle_delta; - memcpy(mbmi->txk_type, best_txk_type, - sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); - memcpy(x->blk_skip, best_blk_skip, - sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize)); - } - return best_rd; -} - -// Indices are sign, integer, and fractional part of the gradient value -static const uint8_t gradient_to_angle_bin[2][7][16] = { - { - { 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0 }, - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 }, - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, - { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, - { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, - }, - { - { 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4 }, - { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3 }, - { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, - { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, - { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 }, - { 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, - { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, - }, -}; - -/* clang-format off */ -static const uint8_t mode_to_angle_bin[INTRA_MODES] = { - 0, 2, 6, 0, 4, 3, 5, 7, 1, 0, - 0, -}; -/* clang-format on */ - -static void angle_estimation(const uint8_t *src, int src_stride, int rows, - int cols, BLOCK_SIZE bsize, - uint8_t *directional_mode_skip_mask) { - memset(directional_mode_skip_mask, 0, - INTRA_MODES * sizeof(*directional_mode_skip_mask)); - // Check if angle_delta is used - if (!av1_use_angle_delta(bsize)) return; - uint64_t hist[DIRECTIONAL_MODES]; - memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0])); - src += src_stride; - int r, c, dx, dy; - for (r = 1; r < rows; ++r) { - for (c = 1; c < cols; ++c) { - dx = src[c] - src[c - 1]; - dy = src[c] - src[c - src_stride]; - int index; - const int temp = dx * dx + dy * dy; - if (dy == 0) { - index = 2; - } else { - const int sn = (dx > 0) ^ (dy > 0); - dx = abs(dx); - dy = abs(dy); - const int remd = (dx % dy) * 16 / dy; - const int quot = dx / dy; - index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)]; - } - hist[index] += temp; - } - src += src_stride; - } - - int i; - uint64_t hist_sum = 0; - for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i]; - for (i = 0; i < INTRA_MODES; ++i) { - if (av1_is_directional_mode(i)) { - const uint8_t angle_bin = mode_to_angle_bin[i]; - uint64_t score = 2 * hist[angle_bin]; - int weight = 2; - if (angle_bin > 0) { - score += hist[angle_bin - 1]; - ++weight; - } - if (angle_bin < DIRECTIONAL_MODES - 1) { - score += hist[angle_bin + 1]; - ++weight; - } - if (score * ANGLE_SKIP_THRESH < hist_sum * weight) - directional_mode_skip_mask[i] = 1; - } - } -} - -static void highbd_angle_estimation(const uint8_t *src8, int src_stride, - int rows, int cols, BLOCK_SIZE bsize, - uint8_t *directional_mode_skip_mask) { - memset(directional_mode_skip_mask, 0, - INTRA_MODES * sizeof(*directional_mode_skip_mask)); - // Check if angle_delta is used - if (!av1_use_angle_delta(bsize)) return; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint64_t hist[DIRECTIONAL_MODES]; - memset(hist, 0, DIRECTIONAL_MODES * sizeof(hist[0])); - src += src_stride; - int r, c, dx, dy; - for (r = 1; r < rows; ++r) { - for (c = 1; c < cols; ++c) { - dx = src[c] - src[c - 1]; - dy = src[c] - src[c - src_stride]; - int index; - const int temp = dx * dx + dy * dy; - if (dy == 0) { - index = 2; - } else { - const int sn = (dx > 0) ^ (dy > 0); - dx = abs(dx); - dy = abs(dy); - const int remd = (dx % dy) * 16 / dy; - const int quot = dx / dy; - index = gradient_to_angle_bin[sn][AOMMIN(quot, 6)][AOMMIN(remd, 15)]; - } - hist[index] += temp; - } - src += src_stride; - } - - int i; - uint64_t hist_sum = 0; - for (i = 0; i < DIRECTIONAL_MODES; ++i) hist_sum += hist[i]; - for (i = 0; i < INTRA_MODES; ++i) { - if (av1_is_directional_mode(i)) { - const uint8_t angle_bin = mode_to_angle_bin[i]; - uint64_t score = 2 * hist[angle_bin]; - int weight = 2; - if (angle_bin > 0) { - score += hist[angle_bin - 1]; - ++weight; - } - if (angle_bin < DIRECTIONAL_MODES - 1) { - score += hist[angle_bin + 1]; - ++weight; - } - if (score * ANGLE_SKIP_THRESH < hist_sum * weight) - directional_mode_skip_mask[i] = 1; - } - } -} - -// Given selected prediction mode, search for the best tx type and size. -static void intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, const int *bmode_costs, - int64_t *best_rd, int *rate, int *rate_tokenonly, - int64_t *distortion, int *skippable, - MB_MODE_INFO *best_mbmi, PICK_MODE_CONTEXT *ctx) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - RD_STATS rd_stats; - super_block_yrd(cpi, x, &rd_stats, bsize, *best_rd); - if (rd_stats.rate == INT_MAX) return; - int this_rate_tokenonly = rd_stats.rate; - if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(mbmi->sb_type)) { - // super_block_yrd above includes the cost of the tx_size in the - // tokenonly rate, but for intra blocks, tx_size is always coded - // (prediction granularity), so we account for it in the full rate, - // not the tokenonly rate. - this_rate_tokenonly -= tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size); - } - const int this_rate = - rd_stats.rate + - intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]); - const int64_t this_rd = RDCOST(x->rdmult, this_rate, rd_stats.dist); - if (this_rd < *best_rd) { - *best_mbmi = *mbmi; - *best_rd = this_rd; - *rate = this_rate; - *rate_tokenonly = this_rate_tokenonly; - *distortion = rd_stats.dist; - *skippable = rd_stats.skip; - memcpy(ctx->blk_skip, x->blk_skip, - sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); - } -} - -// This function is used only for intra_only frames -static int64_t rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x, - int mi_row, int mi_col, int *rate, - int *rate_tokenonly, int64_t *distortion, - int *skippable, BLOCK_SIZE bsize, - int64_t best_rd, PICK_MODE_CONTEXT *ctx) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - assert(!is_inter_block(mbmi)); - int64_t best_model_rd = INT64_MAX; - const int rows = block_size_high[bsize]; - const int cols = block_size_wide[bsize]; - int is_directional_mode; - uint8_t directional_mode_skip_mask[INTRA_MODES]; - const int src_stride = x->plane[0].src.stride; - const uint8_t *src = x->plane[0].src.buf; - int beat_best_rd = 0; - const int *bmode_costs; - PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - const int try_palette = - av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type); - uint8_t *best_palette_color_map = - try_palette ? x->palette_buffer->best_palette_color_map : NULL; - const MB_MODE_INFO *above_mi = xd->above_mbmi; - const MB_MODE_INFO *left_mi = xd->left_mbmi; - const PREDICTION_MODE A = av1_above_block_mode(above_mi); - const PREDICTION_MODE L = av1_left_block_mode(left_mi); - const int above_ctx = intra_mode_context[A]; - const int left_ctx = intra_mode_context[L]; - bmode_costs = x->y_mode_costs[above_ctx][left_ctx]; - - mbmi->angle_delta[PLANE_TYPE_Y] = 0; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - highbd_angle_estimation(src, src_stride, rows, cols, bsize, - directional_mode_skip_mask); - else - angle_estimation(src, src_stride, rows, cols, bsize, - directional_mode_skip_mask); - mbmi->filter_intra_mode_info.use_filter_intra = 0; - pmi->palette_size[0] = 0; - - if (cpi->sf.tx_type_search.fast_intra_tx_type_search) - x->use_default_intra_tx_type = 1; - else - x->use_default_intra_tx_type = 0; - - MB_MODE_INFO best_mbmi = *mbmi; - /* Y Search for intra prediction mode */ - for (int mode_idx = INTRA_MODE_START; mode_idx < INTRA_MODE_END; ++mode_idx) { - RD_STATS this_rd_stats; - int this_rate, this_rate_tokenonly, s; - int64_t this_distortion, this_rd, this_model_rd; - mbmi->mode = intra_rd_search_mode_order[mode_idx]; - mbmi->angle_delta[PLANE_TYPE_Y] = 0; - this_model_rd = - intra_model_yrd(cpi, x, bsize, bmode_costs[mbmi->mode], mi_row, mi_col); - if (best_model_rd != INT64_MAX && - this_model_rd > best_model_rd + (best_model_rd >> 1)) - continue; - if (this_model_rd < best_model_rd) best_model_rd = this_model_rd; - is_directional_mode = av1_is_directional_mode(mbmi->mode); - if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue; - if (is_directional_mode && av1_use_angle_delta(bsize)) { - this_rd_stats.rate = INT_MAX; - rd_pick_intra_angle_sby(cpi, x, mi_row, mi_col, &this_rate, - &this_rd_stats, bsize, bmode_costs[mbmi->mode], - best_rd, &best_model_rd); - } else { - super_block_yrd(cpi, x, &this_rd_stats, bsize, best_rd); - } - this_rate_tokenonly = this_rd_stats.rate; - this_distortion = this_rd_stats.dist; - s = this_rd_stats.skip; - - if (this_rate_tokenonly == INT_MAX) continue; - - if (!xd->lossless[mbmi->segment_id] && - block_signals_txsize(mbmi->sb_type)) { - // super_block_yrd above includes the cost of the tx_size in the - // tokenonly rate, but for intra blocks, tx_size is always coded - // (prediction granularity), so we account for it in the full rate, - // not the tokenonly rate. - this_rate_tokenonly -= - tx_size_cost(&cpi->common, x, bsize, mbmi->tx_size); - } - this_rate = - this_rd_stats.rate + - intra_mode_info_cost_y(cpi, x, mbmi, bsize, bmode_costs[mbmi->mode]); - this_rd = RDCOST(x->rdmult, this_rate, this_distortion); - if (this_rd < best_rd) { - best_mbmi = *mbmi; - best_rd = this_rd; - beat_best_rd = 1; - *rate = this_rate; - *rate_tokenonly = this_rate_tokenonly; - *distortion = this_distortion; - *skippable = s; - memcpy(ctx->blk_skip, x->blk_skip, - sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); - } - } - - if (try_palette) { - rd_pick_palette_intra_sby( - cpi, x, bsize, mi_row, mi_col, bmode_costs[DC_PRED], &best_mbmi, - best_palette_color_map, &best_rd, &best_model_rd, rate, rate_tokenonly, - distortion, skippable, ctx, ctx->blk_skip); - } - - if (beat_best_rd && av1_filter_intra_allowed_bsize(&cpi->common, bsize)) { - if (rd_pick_filter_intra_sby( - cpi, x, mi_row, mi_col, rate, rate_tokenonly, distortion, skippable, - bsize, bmode_costs[DC_PRED], &best_rd, &best_model_rd, ctx)) { - best_mbmi = *mbmi; - } - } - - // If previous searches use only the default tx type, do an extra search for - // the best tx type. - if (x->use_default_intra_tx_type) { - *mbmi = best_mbmi; - x->use_default_intra_tx_type = 0; - intra_block_yrd(cpi, x, bsize, bmode_costs, &best_rd, rate, rate_tokenonly, - distortion, skippable, &best_mbmi, ctx); - } - - *mbmi = best_mbmi; - return best_rd; -} - -// Return value 0: early termination triggered, no valid rd cost available; -// 1: rd cost values are valid. -static int super_block_uvrd(const AV1_COMP *const cpi, MACROBLOCK *x, - RD_STATS *rd_stats, BLOCK_SIZE bsize, - int64_t ref_best_rd) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - struct macroblockd_plane *const pd = &xd->plane[AOM_PLANE_U]; - const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd); - int plane; - int is_cost_valid = 1; - av1_init_rd_stats(rd_stats); - - if (ref_best_rd < 0) is_cost_valid = 0; - - if (x->skip_chroma_rd) return is_cost_valid; - - bsize = scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y); - - if (is_inter_block(mbmi) && is_cost_valid) { - for (plane = 1; plane < MAX_MB_PLANE; ++plane) - av1_subtract_plane(x, bsize, plane); - } - - if (is_cost_valid) { - for (plane = 1; plane < MAX_MB_PLANE; ++plane) { - RD_STATS pn_rd_stats; - txfm_rd_in_plane(x, cpi, &pn_rd_stats, ref_best_rd, plane, bsize, - uv_tx_size, cpi->sf.use_fast_coef_costing, FTXS_NONE); - if (pn_rd_stats.rate == INT_MAX) { - is_cost_valid = 0; - break; - } - av1_merge_rd_stats(rd_stats, &pn_rd_stats); - if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) > ref_best_rd && - RDCOST(x->rdmult, 0, rd_stats->sse) > ref_best_rd) { - is_cost_valid = 0; - break; - } - } - } - - if (!is_cost_valid) { - // reset cost value - av1_invalid_rd_stats(rd_stats); - } - - return is_cost_valid; -} - -static void tx_block_rd_b(const AV1_COMP *cpi, MACROBLOCK *x, TX_SIZE tx_size, - int blk_row, int blk_col, int plane, int block, - int plane_bsize, TXB_CTX *txb_ctx, RD_STATS *rd_stats, - FAST_TX_SEARCH_MODE ftxs_mode, int64_t ref_rdcost, - TXB_RD_INFO *rd_info_array) { - const struct macroblock_plane *const p = &x->plane[plane]; - const uint16_t cur_joint_ctx = - (txb_ctx->dc_sign_ctx << 8) + txb_ctx->txb_skip_ctx; - const int txk_type_idx = - av1_get_txk_type_index(plane_bsize, blk_row, blk_col); - // Look up RD and terminate early in case when we've already processed exactly - // the same residual with exactly the same entropy context. - if (rd_info_array != NULL && rd_info_array->valid && - rd_info_array->entropy_context == cur_joint_ctx) { - if (plane == 0) - x->e_mbd.mi[0]->txk_type[txk_type_idx] = rd_info_array->tx_type; - const TX_TYPE ref_tx_type = - av1_get_tx_type(get_plane_type(plane), &x->e_mbd, blk_row, blk_col, - tx_size, cpi->common.reduced_tx_set_used); - if (ref_tx_type == rd_info_array->tx_type) { - rd_stats->rate += rd_info_array->rate; - rd_stats->dist += rd_info_array->dist; - rd_stats->sse += rd_info_array->sse; - rd_stats->skip &= rd_info_array->eob == 0; - p->eobs[block] = rd_info_array->eob; - p->txb_entropy_ctx[block] = rd_info_array->txb_entropy_ctx; - return; - } - } - - RD_STATS this_rd_stats; - search_txk_type(cpi, x, plane, block, blk_row, blk_col, plane_bsize, tx_size, - txb_ctx, ftxs_mode, 0, ref_rdcost, &this_rd_stats); - - av1_merge_rd_stats(rd_stats, &this_rd_stats); - - // Save RD results for possible reuse in future. - if (rd_info_array != NULL) { - rd_info_array->valid = 1; - rd_info_array->entropy_context = cur_joint_ctx; - rd_info_array->rate = this_rd_stats.rate; - rd_info_array->dist = this_rd_stats.dist; - rd_info_array->sse = this_rd_stats.sse; - rd_info_array->eob = p->eobs[block]; - rd_info_array->txb_entropy_ctx = p->txb_entropy_ctx[block]; - if (plane == 0) { - rd_info_array->tx_type = x->e_mbd.mi[0]->txk_type[txk_type_idx]; - } - } -} - -static void get_mean_and_dev(const int16_t *data, int stride, int bw, int bh, - float *mean, float *dev) { - int x_sum = 0; - uint64_t x2_sum = 0; - for (int i = 0; i < bh; ++i) { - for (int j = 0; j < bw; ++j) { - const int val = data[j]; - x_sum += val; - x2_sum += val * val; - } - data += stride; - } - - const int num = bw * bh; - const float e_x = (float)x_sum / num; - const float e_x2 = (float)((double)x2_sum / num); - const float diff = e_x2 - e_x * e_x; - *dev = (diff > 0) ? sqrtf(diff) : 0; - *mean = e_x; -} - -static void get_mean_and_dev_float(const float *data, int stride, int bw, - int bh, float *mean, float *dev) { - float x_sum = 0; - float x2_sum = 0; - for (int i = 0; i < bh; ++i) { - for (int j = 0; j < bw; ++j) { - const float val = data[j]; - x_sum += val; - x2_sum += val * val; - } - data += stride; - } - - const int num = bw * bh; - const float e_x = x_sum / num; - const float e_x2 = x2_sum / num; - const float diff = e_x2 - e_x * e_x; - *dev = (diff > 0) ? sqrtf(diff) : 0; - *mean = e_x; -} - -// Feature used by the model to predict tx split: the mean and standard -// deviation values of the block and sub-blocks. -static void get_mean_dev_features(const int16_t *data, int stride, int bw, - int bh, int levels, float *feature) { - int feature_idx = 0; - int width = bw; - int height = bh; - const int16_t *const data_ptr = &data[0]; - for (int lv = 0; lv < levels; ++lv) { - if (width < 2 || height < 2) break; - float mean_buf[16]; - float dev_buf[16]; - int blk_idx = 0; - for (int row = 0; row < bh; row += height) { - for (int col = 0; col < bw; col += width) { - float mean, dev; - get_mean_and_dev(data_ptr + row * stride + col, stride, width, height, - &mean, &dev); - feature[feature_idx++] = mean; - feature[feature_idx++] = dev; - mean_buf[blk_idx] = mean; - dev_buf[blk_idx++] = dev; - } - } - if (blk_idx > 1) { - float mean, dev; - // Deviation of means. - get_mean_and_dev_float(mean_buf, 1, 1, blk_idx, &mean, &dev); - feature[feature_idx++] = dev; - // Mean of deviations. - get_mean_and_dev_float(dev_buf, 1, 1, blk_idx, &mean, &dev); - feature[feature_idx++] = mean; - } - // Reduce the block size when proceeding to the next level. - if (height == width) { - height = height >> 1; - width = width >> 1; - } else if (height > width) { - height = height >> 1; - } else { - width = width >> 1; - } - } -} - -static int ml_predict_tx_split(MACROBLOCK *x, BLOCK_SIZE bsize, int blk_row, - int blk_col, TX_SIZE tx_size) { - const NN_CONFIG *nn_config = av1_tx_split_nnconfig_map[tx_size]; - if (!nn_config) return -1; - - const int diff_stride = block_size_wide[bsize]; - const int16_t *diff = - x->plane[0].src_diff + 4 * blk_row * diff_stride + 4 * blk_col; - const int bw = tx_size_wide[tx_size]; - const int bh = tx_size_high[tx_size]; - aom_clear_system_state(); - - float features[64] = { 0.0f }; - get_mean_dev_features(diff, diff_stride, bw, bh, 2, features); - - float score = 0.0f; - av1_nn_predict(features, nn_config, &score); - if (score > 8.0f) return 100; - if (score < -8.0f) return 0; - score = 1.0f / (1.0f + (float)exp(-score)); - return (int)(score * 100); -} - -typedef struct { - int64_t rd; - int txb_entropy_ctx; - TX_TYPE tx_type; -} TxCandidateInfo; - -static void try_tx_block_no_split( - const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int blk_col, int block, - TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize, - const ENTROPY_CONTEXT *ta, const ENTROPY_CONTEXT *tl, - int txfm_partition_ctx, RD_STATS *rd_stats, int64_t ref_best_rd, - FAST_TX_SEARCH_MODE ftxs_mode, TXB_RD_INFO_NODE *rd_info_node, - TxCandidateInfo *no_split) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - struct macroblock_plane *const p = &x->plane[0]; - const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - - no_split->rd = INT64_MAX; - no_split->txb_entropy_ctx = 0; - no_split->tx_type = TX_TYPES; - - const ENTROPY_CONTEXT *const pta = ta + blk_col; - const ENTROPY_CONTEXT *const ptl = tl + blk_row; - - const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); - TXB_CTX txb_ctx; - get_txb_ctx(plane_bsize, tx_size, 0, pta, ptl, &txb_ctx); - const int zero_blk_rate = x->coeff_costs[txs_ctx][PLANE_TYPE_Y] - .txb_skip_cost[txb_ctx.txb_skip_ctx][1]; - - rd_stats->ref_rdcost = ref_best_rd; - rd_stats->zero_rate = zero_blk_rate; - const int index = av1_get_txb_size_index(plane_bsize, blk_row, blk_col); - mbmi->inter_tx_size[index] = tx_size; - tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, 0, block, plane_bsize, - &txb_ctx, rd_stats, ftxs_mode, ref_best_rd, - rd_info_node != NULL ? rd_info_node->rd_info_array : NULL); - assert(rd_stats->rate < INT_MAX); - - if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >= - RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) || - rd_stats->skip == 1) && - !xd->lossless[mbmi->segment_id]) { -#if CONFIG_RD_DEBUG - av1_update_txb_coeff_cost(rd_stats, plane, tx_size, blk_row, blk_col, - zero_blk_rate - rd_stats->rate); -#endif // CONFIG_RD_DEBUG - rd_stats->rate = zero_blk_rate; - rd_stats->dist = rd_stats->sse; - rd_stats->skip = 1; - set_blk_skip(x, 0, blk_row * bw + blk_col, 1); - p->eobs[block] = 0; - update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, - DCT_DCT); - } else { - set_blk_skip(x, 0, blk_row * bw + blk_col, 0); - rd_stats->skip = 0; - } - - if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) - rd_stats->rate += x->txfm_partition_cost[txfm_partition_ctx][0]; - - no_split->rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); - no_split->txb_entropy_ctx = p->txb_entropy_ctx[block]; - const int txk_type_idx = - av1_get_txk_type_index(plane_bsize, blk_row, blk_col); - no_split->tx_type = mbmi->txk_type[txk_type_idx]; -} - -static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, - int blk_col, int block, TX_SIZE tx_size, int depth, - BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta, - ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above, - TXFM_CONTEXT *tx_left, RD_STATS *rd_stats, - int64_t ref_best_rd, int *is_cost_valid, - FAST_TX_SEARCH_MODE ftxs_mode, - TXB_RD_INFO_NODE *rd_info_node); - -static void try_tx_block_split( - const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, int blk_col, int block, - TX_SIZE tx_size, int depth, BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta, - ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left, - int txfm_partition_ctx, int64_t no_split_rd, int64_t ref_best_rd, - FAST_TX_SEARCH_MODE ftxs_mode, TXB_RD_INFO_NODE *rd_info_node, - RD_STATS *split_rd_stats, int64_t *split_rd) { - MACROBLOCKD *const xd = &x->e_mbd; - const int max_blocks_high = max_block_high(xd, plane_bsize, 0); - const int max_blocks_wide = max_block_wide(xd, plane_bsize, 0); - const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; - const int bsw = tx_size_wide_unit[sub_txs]; - const int bsh = tx_size_high_unit[sub_txs]; - const int sub_step = bsw * bsh; - RD_STATS this_rd_stats; - int this_cost_valid = 1; - int64_t tmp_rd = 0; - - split_rd_stats->rate = x->txfm_partition_cost[txfm_partition_ctx][1]; - - assert(tx_size < TX_SIZES_ALL); - - int blk_idx = 0; - for (int r = 0; r < tx_size_high_unit[tx_size]; r += bsh) { - for (int c = 0; c < tx_size_wide_unit[tx_size]; c += bsw, ++blk_idx) { - const int offsetr = blk_row + r; - const int offsetc = blk_col + c; - if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; - assert(blk_idx < 4); - select_tx_block( - cpi, x, offsetr, offsetc, block, sub_txs, depth + 1, plane_bsize, ta, - tl, tx_above, tx_left, &this_rd_stats, ref_best_rd - tmp_rd, - &this_cost_valid, ftxs_mode, - (rd_info_node != NULL) ? rd_info_node->children[blk_idx] : NULL); - - if (!this_cost_valid) goto LOOP_EXIT; - - av1_merge_rd_stats(split_rd_stats, &this_rd_stats); - - tmp_rd = RDCOST(x->rdmult, split_rd_stats->rate, split_rd_stats->dist); - - if (no_split_rd < tmp_rd) { - this_cost_valid = 0; - goto LOOP_EXIT; - } - block += sub_step; - } - } - -LOOP_EXIT : {} - - if (this_cost_valid) *split_rd = tmp_rd; -} - -// Search for the best tx partition/type for a given luma block. -static void select_tx_block(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, - int blk_col, int block, TX_SIZE tx_size, int depth, - BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *ta, - ENTROPY_CONTEXT *tl, TXFM_CONTEXT *tx_above, - TXFM_CONTEXT *tx_left, RD_STATS *rd_stats, - int64_t ref_best_rd, int *is_cost_valid, - FAST_TX_SEARCH_MODE ftxs_mode, - TXB_RD_INFO_NODE *rd_info_node) { - assert(tx_size < TX_SIZES_ALL); - av1_init_rd_stats(rd_stats); - if (ref_best_rd < 0) { - *is_cost_valid = 0; - return; - } - - MACROBLOCKD *const xd = &x->e_mbd; - const int max_blocks_high = max_block_high(xd, plane_bsize, 0); - const int max_blocks_wide = max_block_wide(xd, plane_bsize, 0); - if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; - - const int bw = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row, - mbmi->sb_type, tx_size); - struct macroblock_plane *const p = &x->plane[0]; - - const int try_no_split = 1; - int try_split = tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH; -#if CONFIG_DIST_8X8 - if (x->using_dist_8x8) - try_split &= tx_size_wide[tx_size] >= 16 && tx_size_high[tx_size] >= 16; -#endif - TxCandidateInfo no_split = { INT64_MAX, 0, TX_TYPES }; - - // TX no split - if (try_no_split) { - try_tx_block_no_split(cpi, x, blk_row, blk_col, block, tx_size, depth, - plane_bsize, ta, tl, ctx, rd_stats, ref_best_rd, - ftxs_mode, rd_info_node, &no_split); - - if (cpi->sf.adaptive_txb_search_level && - (no_split.rd - - (no_split.rd >> (1 + cpi->sf.adaptive_txb_search_level))) > - ref_best_rd) { - *is_cost_valid = 0; - return; - } - - if (cpi->sf.txb_split_cap) { - if (p->eobs[block] == 0) try_split = 0; - } - } - - if (x->e_mbd.bd == 8 && !x->cb_partition_scan && try_split) { - const int threshold = cpi->sf.tx_type_search.ml_tx_split_thresh; - if (threshold >= 0) { - const int split_score = - ml_predict_tx_split(x, plane_bsize, blk_row, blk_col, tx_size); - if (split_score >= 0 && split_score < threshold) try_split = 0; - } - } - - // TX split - int64_t split_rd = INT64_MAX; - RD_STATS split_rd_stats; - av1_init_rd_stats(&split_rd_stats); - if (try_split) { - try_tx_block_split(cpi, x, blk_row, blk_col, block, tx_size, depth, - plane_bsize, ta, tl, tx_above, tx_left, ctx, no_split.rd, - AOMMIN(no_split.rd, ref_best_rd), ftxs_mode, - rd_info_node, &split_rd_stats, &split_rd); - } - - if (no_split.rd < split_rd) { - ENTROPY_CONTEXT *pta = ta + blk_col; - ENTROPY_CONTEXT *ptl = tl + blk_row; - const TX_SIZE tx_size_selected = tx_size; - p->txb_entropy_ctx[block] = no_split.txb_entropy_ctx; - av1_set_txb_context(x, 0, block, tx_size_selected, pta, ptl); - txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size, - tx_size); - for (int idy = 0; idy < tx_size_high_unit[tx_size]; ++idy) { - for (int idx = 0; idx < tx_size_wide_unit[tx_size]; ++idx) { - const int index = - av1_get_txb_size_index(plane_bsize, blk_row + idy, blk_col + idx); - mbmi->inter_tx_size[index] = tx_size_selected; - } - } - mbmi->tx_size = tx_size_selected; - update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, - no_split.tx_type); - set_blk_skip(x, 0, blk_row * bw + blk_col, rd_stats->skip); - } else { - *rd_stats = split_rd_stats; - if (split_rd == INT64_MAX) *is_cost_valid = 0; - } -} - -static void select_inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, - RD_STATS *rd_stats, BLOCK_SIZE bsize, - int64_t ref_best_rd, - FAST_TX_SEARCH_MODE ftxs_mode, - TXB_RD_INFO_NODE *rd_info_tree) { - MACROBLOCKD *const xd = &x->e_mbd; - int is_cost_valid = 1; - int64_t this_rd = 0, skip_rd = 0; - - if (ref_best_rd < 0) is_cost_valid = 0; - - av1_init_rd_stats(rd_stats); - - if (is_cost_valid) { - const struct macroblockd_plane *const pd = &xd->plane[0]; - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); - const int mi_width = mi_size_wide[plane_bsize]; - const int mi_height = mi_size_high[plane_bsize]; - const TX_SIZE max_tx_size = max_txsize_rect_lookup[plane_bsize]; - const int bh = tx_size_high_unit[max_tx_size]; - const int bw = tx_size_wide_unit[max_tx_size]; - int idx, idy; - int block = 0; - int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size]; - ENTROPY_CONTEXT ctxa[MAX_MIB_SIZE]; - ENTROPY_CONTEXT ctxl[MAX_MIB_SIZE]; - TXFM_CONTEXT tx_above[MAX_MIB_SIZE]; - TXFM_CONTEXT tx_left[MAX_MIB_SIZE]; - - RD_STATS pn_rd_stats; - const int init_depth = - get_search_init_depth(mi_width, mi_height, 1, &cpi->sf); - av1_init_rd_stats(&pn_rd_stats); - - av1_get_entropy_contexts(bsize, pd, ctxa, ctxl); - memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width); - memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height); - const int skip_ctx = av1_get_skip_context(xd); - const int s0 = x->skip_cost[skip_ctx][0]; - const int s1 = x->skip_cost[skip_ctx][1]; - - skip_rd = RDCOST(x->rdmult, s1, 0); - this_rd = RDCOST(x->rdmult, s0, 0); - for (idy = 0; idy < mi_height; idy += bh) { - for (idx = 0; idx < mi_width; idx += bw) { - int64_t best_rd_sofar = (ref_best_rd - (AOMMIN(skip_rd, this_rd))); - select_tx_block(cpi, x, idy, idx, block, max_tx_size, init_depth, - plane_bsize, ctxa, ctxl, tx_above, tx_left, - &pn_rd_stats, best_rd_sofar, &is_cost_valid, ftxs_mode, - rd_info_tree); - if (!is_cost_valid || pn_rd_stats.rate == INT_MAX) { - av1_invalid_rd_stats(rd_stats); - return; - } - av1_merge_rd_stats(rd_stats, &pn_rd_stats); - skip_rd = RDCOST(x->rdmult, s1, rd_stats->sse); - this_rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist); - block += step; - if (rd_info_tree != NULL) rd_info_tree += 1; - } - } - if (skip_rd <= this_rd) { - rd_stats->rate = 0; - rd_stats->dist = rd_stats->sse; - rd_stats->skip = 1; - } else { - rd_stats->skip = 0; - } - } - - if (!is_cost_valid) { - // reset cost value - av1_invalid_rd_stats(rd_stats); - } -} - -static int64_t select_tx_size_fix_type(const AV1_COMP *cpi, MACROBLOCK *x, - RD_STATS *rd_stats, BLOCK_SIZE bsize, - int64_t ref_best_rd, - TXB_RD_INFO_NODE *rd_info_tree) { - const int fast_tx_search = cpi->sf.tx_size_search_method > USE_FULL_RD; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const int is_inter = is_inter_block(mbmi); - const int skip_ctx = av1_get_skip_context(xd); - int s0 = x->skip_cost[skip_ctx][0]; - int s1 = x->skip_cost[skip_ctx][1]; - int64_t rd; - - // TODO(debargha): enable this as a speed feature where the - // select_inter_block_yrd() function above will use a simplified search - // such as not using full optimize, but the inter_block_yrd() function - // will use more complex search given that the transform partitions have - // already been decided. - - int64_t rd_thresh = ref_best_rd; - if (fast_tx_search && rd_thresh < INT64_MAX) { - if (INT64_MAX - rd_thresh > (rd_thresh >> 3)) rd_thresh += (rd_thresh >> 3); - } - assert(rd_thresh > 0); - - FAST_TX_SEARCH_MODE ftxs_mode = - fast_tx_search ? FTXS_DCT_AND_1D_DCT_ONLY : FTXS_NONE; - select_inter_block_yrd(cpi, x, rd_stats, bsize, rd_thresh, ftxs_mode, - rd_info_tree); - if (rd_stats->rate == INT_MAX) return INT64_MAX; - - // If fast_tx_search is true, only DCT and 1D DCT were tested in - // select_inter_block_yrd() above. Do a better search for tx type with - // tx sizes already decided. - if (fast_tx_search) { - if (!inter_block_yrd(cpi, x, rd_stats, bsize, ref_best_rd, FTXS_NONE)) - return INT64_MAX; - } - - if (rd_stats->skip) - rd = RDCOST(x->rdmult, s1, rd_stats->sse); - else - rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist); - - if (is_inter && !xd->lossless[xd->mi[0]->segment_id] && !(rd_stats->skip)) - rd = AOMMIN(rd, RDCOST(x->rdmult, s1, rd_stats->sse)); - - return rd; -} - -// Finds rd cost for a y block, given the transform size partitions -static void tx_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, - int blk_col, int block, TX_SIZE tx_size, - BLOCK_SIZE plane_bsize, int depth, - ENTROPY_CONTEXT *above_ctx, ENTROPY_CONTEXT *left_ctx, - TXFM_CONTEXT *tx_above, TXFM_CONTEXT *tx_left, - int64_t ref_best_rd, RD_STATS *rd_stats, - FAST_TX_SEARCH_MODE ftxs_mode) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const int max_blocks_high = max_block_high(xd, plane_bsize, 0); - const int max_blocks_wide = max_block_wide(xd, plane_bsize, 0); - - assert(tx_size < TX_SIZES_ALL); - - if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; - - const TX_SIZE plane_tx_size = mbmi->inter_tx_size[av1_get_txb_size_index( - plane_bsize, blk_row, blk_col)]; - - int ctx = txfm_partition_context(tx_above + blk_col, tx_left + blk_row, - mbmi->sb_type, tx_size); - - av1_init_rd_stats(rd_stats); - if (tx_size == plane_tx_size) { - ENTROPY_CONTEXT *ta = above_ctx + blk_col; - ENTROPY_CONTEXT *tl = left_ctx + blk_row; - const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); - TXB_CTX txb_ctx; - get_txb_ctx(plane_bsize, tx_size, 0, ta, tl, &txb_ctx); - - const int zero_blk_rate = x->coeff_costs[txs_ctx][get_plane_type(0)] - .txb_skip_cost[txb_ctx.txb_skip_ctx][1]; - rd_stats->zero_rate = zero_blk_rate; - rd_stats->ref_rdcost = ref_best_rd; - tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, 0, block, plane_bsize, - &txb_ctx, rd_stats, ftxs_mode, ref_best_rd, NULL); - const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - if (RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >= - RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) || - rd_stats->skip == 1) { - rd_stats->rate = zero_blk_rate; - rd_stats->dist = rd_stats->sse; - rd_stats->skip = 1; - set_blk_skip(x, 0, blk_row * mi_width + blk_col, 1); - x->plane[0].eobs[block] = 0; - x->plane[0].txb_entropy_ctx[block] = 0; - update_txk_array(mbmi->txk_type, plane_bsize, blk_row, blk_col, tx_size, - DCT_DCT); - } else { - rd_stats->skip = 0; - set_blk_skip(x, 0, blk_row * mi_width + blk_col, 0); - } - if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) - rd_stats->rate += x->txfm_partition_cost[ctx][0]; - av1_set_txb_context(x, 0, block, tx_size, ta, tl); - txfm_partition_update(tx_above + blk_col, tx_left + blk_row, tx_size, - tx_size); - } else { - const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; - const int bsw = tx_size_wide_unit[sub_txs]; - const int bsh = tx_size_high_unit[sub_txs]; - const int step = bsh * bsw; - RD_STATS pn_rd_stats; - int64_t this_rd = 0; - assert(bsw > 0 && bsh > 0); - - for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) { - for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) { - const int offsetr = blk_row + row; - const int offsetc = blk_col + col; - - if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; - - av1_init_rd_stats(&pn_rd_stats); - tx_block_yrd(cpi, x, offsetr, offsetc, block, sub_txs, plane_bsize, - depth + 1, above_ctx, left_ctx, tx_above, tx_left, - ref_best_rd - this_rd, &pn_rd_stats, ftxs_mode); - if (pn_rd_stats.rate == INT_MAX) { - av1_invalid_rd_stats(rd_stats); - return; - } - av1_merge_rd_stats(rd_stats, &pn_rd_stats); - this_rd += RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist); - block += step; - } - } - - if (tx_size > TX_4X4 && depth < MAX_VARTX_DEPTH) - rd_stats->rate += x->txfm_partition_cost[ctx][1]; - } -} - -// Return value 0: early termination triggered, no valid rd cost available; -// 1: rd cost values are valid. -static int inter_block_yrd(const AV1_COMP *cpi, MACROBLOCK *x, - RD_STATS *rd_stats, BLOCK_SIZE bsize, - int64_t ref_best_rd, FAST_TX_SEARCH_MODE ftxs_mode) { - MACROBLOCKD *const xd = &x->e_mbd; - int is_cost_valid = 1; - int64_t this_rd = 0; - - if (ref_best_rd < 0) is_cost_valid = 0; - - av1_init_rd_stats(rd_stats); - - if (is_cost_valid) { - const struct macroblockd_plane *const pd = &xd->plane[0]; - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); - const int mi_width = mi_size_wide[plane_bsize]; - const int mi_height = mi_size_high[plane_bsize]; - const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0); - const int bh = tx_size_high_unit[max_tx_size]; - const int bw = tx_size_wide_unit[max_tx_size]; - const int init_depth = - get_search_init_depth(mi_width, mi_height, 1, &cpi->sf); - int idx, idy; - int block = 0; - int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size]; - ENTROPY_CONTEXT ctxa[MAX_MIB_SIZE]; - ENTROPY_CONTEXT ctxl[MAX_MIB_SIZE]; - TXFM_CONTEXT tx_above[MAX_MIB_SIZE]; - TXFM_CONTEXT tx_left[MAX_MIB_SIZE]; - RD_STATS pn_rd_stats; - - av1_get_entropy_contexts(bsize, pd, ctxa, ctxl); - memcpy(tx_above, xd->above_txfm_context, sizeof(TXFM_CONTEXT) * mi_width); - memcpy(tx_left, xd->left_txfm_context, sizeof(TXFM_CONTEXT) * mi_height); - - for (idy = 0; idy < mi_height; idy += bh) { - for (idx = 0; idx < mi_width; idx += bw) { - av1_init_rd_stats(&pn_rd_stats); - tx_block_yrd(cpi, x, idy, idx, block, max_tx_size, plane_bsize, - init_depth, ctxa, ctxl, tx_above, tx_left, - ref_best_rd - this_rd, &pn_rd_stats, ftxs_mode); - if (pn_rd_stats.rate == INT_MAX) { - av1_invalid_rd_stats(rd_stats); - return 0; - } - av1_merge_rd_stats(rd_stats, &pn_rd_stats); - this_rd += - AOMMIN(RDCOST(x->rdmult, pn_rd_stats.rate, pn_rd_stats.dist), - RDCOST(x->rdmult, pn_rd_stats.zero_rate, pn_rd_stats.sse)); - block += step; - } - } - } - - const int skip_ctx = av1_get_skip_context(xd); - const int s0 = x->skip_cost[skip_ctx][0]; - const int s1 = x->skip_cost[skip_ctx][1]; - int64_t skip_rd = RDCOST(x->rdmult, s1, rd_stats->sse); - this_rd = RDCOST(x->rdmult, rd_stats->rate + s0, rd_stats->dist); - if (skip_rd < this_rd) { - this_rd = skip_rd; - rd_stats->rate = 0; - rd_stats->dist = rd_stats->sse; - rd_stats->skip = 1; - } - if (this_rd > ref_best_rd) is_cost_valid = 0; - - if (!is_cost_valid) { - // reset cost value - av1_invalid_rd_stats(rd_stats); - } - return is_cost_valid; -} - -static INLINE uint32_t get_block_residue_hash(MACROBLOCK *x, BLOCK_SIZE bsize) { - const int rows = block_size_high[bsize]; - const int cols = block_size_wide[bsize]; - const int16_t *diff = x->plane[0].src_diff; - const uint32_t hash = av1_get_crc32c_value(&x->mb_rd_record.crc_calculator, - (uint8_t *)diff, 2 * rows * cols); - return (hash << 5) + bsize; -} - -static void save_tx_rd_info(int n4, uint32_t hash, const MACROBLOCK *const x, - const RD_STATS *const rd_stats, - MB_RD_RECORD *tx_rd_record) { - int index; - if (tx_rd_record->num < RD_RECORD_BUFFER_LEN) { - index = - (tx_rd_record->index_start + tx_rd_record->num) % RD_RECORD_BUFFER_LEN; - ++tx_rd_record->num; - } else { - index = tx_rd_record->index_start; - tx_rd_record->index_start = - (tx_rd_record->index_start + 1) % RD_RECORD_BUFFER_LEN; - } - MB_RD_INFO *const tx_rd_info = &tx_rd_record->tx_rd_info[index]; - const MACROBLOCKD *const xd = &x->e_mbd; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - tx_rd_info->hash_value = hash; - tx_rd_info->tx_size = mbmi->tx_size; - memcpy(tx_rd_info->blk_skip, x->blk_skip, - sizeof(tx_rd_info->blk_skip[0]) * n4); - av1_copy(tx_rd_info->inter_tx_size, mbmi->inter_tx_size); - av1_copy(tx_rd_info->txk_type, mbmi->txk_type); - tx_rd_info->rd_stats = *rd_stats; -} - -static void fetch_tx_rd_info(int n4, const MB_RD_INFO *const tx_rd_info, - RD_STATS *const rd_stats, MACROBLOCK *const x) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - mbmi->tx_size = tx_rd_info->tx_size; - memcpy(x->blk_skip, tx_rd_info->blk_skip, - sizeof(tx_rd_info->blk_skip[0]) * n4); - av1_copy(mbmi->inter_tx_size, tx_rd_info->inter_tx_size); - av1_copy(mbmi->txk_type, tx_rd_info->txk_type); - *rd_stats = tx_rd_info->rd_stats; -} - -static int find_tx_size_rd_info(TXB_RD_RECORD *cur_record, - const uint32_t hash) { - // Linear search through the circular buffer to find matching hash. - for (int i = cur_record->index_start - 1; i >= 0; i--) { - if (cur_record->hash_vals[i] == hash) return i; - } - for (int i = cur_record->num - 1; i >= cur_record->index_start; i--) { - if (cur_record->hash_vals[i] == hash) return i; - } - int index; - // If not found - add new RD info into the buffer and return its index - if (cur_record->num < TX_SIZE_RD_RECORD_BUFFER_LEN) { - index = (cur_record->index_start + cur_record->num) % - TX_SIZE_RD_RECORD_BUFFER_LEN; - cur_record->num++; - } else { - index = cur_record->index_start; - cur_record->index_start = - (cur_record->index_start + 1) % TX_SIZE_RD_RECORD_BUFFER_LEN; - } - - cur_record->hash_vals[index] = hash; - av1_zero(cur_record->tx_rd_info[index]); - return index; -} - -typedef struct { - int leaf; - int8_t children[4]; -} RD_RECORD_IDX_NODE; - -static const RD_RECORD_IDX_NODE rd_record_tree_8x8[] = { - { 1, { 0 } }, -}; - -static const RD_RECORD_IDX_NODE rd_record_tree_8x16[] = { - { 0, { 1, 2, -1, -1 } }, - { 1, { 0, 0, 0, 0 } }, - { 1, { 0, 0, 0, 0 } }, -}; - -static const RD_RECORD_IDX_NODE rd_record_tree_16x8[] = { - { 0, { 1, 2, -1, -1 } }, - { 1, { 0 } }, - { 1, { 0 } }, -}; - -static const RD_RECORD_IDX_NODE rd_record_tree_16x16[] = { - { 0, { 1, 2, 3, 4 } }, { 1, { 0 } }, { 1, { 0 } }, { 1, { 0 } }, { 1, { 0 } }, -}; - -static const RD_RECORD_IDX_NODE rd_record_tree_1_2[] = { - { 0, { 1, 2, -1, -1 } }, - { 0, { 3, 4, 5, 6 } }, - { 0, { 7, 8, 9, 10 } }, -}; - -static const RD_RECORD_IDX_NODE rd_record_tree_2_1[] = { - { 0, { 1, 2, -1, -1 } }, - { 0, { 3, 4, 7, 8 } }, - { 0, { 5, 6, 9, 10 } }, -}; - -static const RD_RECORD_IDX_NODE rd_record_tree_sqr[] = { - { 0, { 1, 2, 3, 4 } }, { 0, { 5, 6, 9, 10 } }, { 0, { 7, 8, 11, 12 } }, - { 0, { 13, 14, 17, 18 } }, { 0, { 15, 16, 19, 20 } }, -}; - -static const RD_RECORD_IDX_NODE rd_record_tree_64x128[] = { - { 0, { 2, 3, 4, 5 } }, { 0, { 6, 7, 8, 9 } }, - { 0, { 10, 11, 14, 15 } }, { 0, { 12, 13, 16, 17 } }, - { 0, { 18, 19, 22, 23 } }, { 0, { 20, 21, 24, 25 } }, - { 0, { 26, 27, 30, 31 } }, { 0, { 28, 29, 32, 33 } }, - { 0, { 34, 35, 38, 39 } }, { 0, { 36, 37, 40, 41 } }, -}; - -static const RD_RECORD_IDX_NODE rd_record_tree_128x64[] = { - { 0, { 2, 3, 6, 7 } }, { 0, { 4, 5, 8, 9 } }, - { 0, { 10, 11, 18, 19 } }, { 0, { 12, 13, 20, 21 } }, - { 0, { 14, 15, 22, 23 } }, { 0, { 16, 17, 24, 25 } }, - { 0, { 26, 27, 34, 35 } }, { 0, { 28, 29, 36, 37 } }, - { 0, { 30, 31, 38, 39 } }, { 0, { 32, 33, 40, 41 } }, -}; - -static const RD_RECORD_IDX_NODE rd_record_tree_128x128[] = { - { 0, { 4, 5, 8, 9 } }, { 0, { 6, 7, 10, 11 } }, - { 0, { 12, 13, 16, 17 } }, { 0, { 14, 15, 18, 19 } }, - { 0, { 20, 21, 28, 29 } }, { 0, { 22, 23, 30, 31 } }, - { 0, { 24, 25, 32, 33 } }, { 0, { 26, 27, 34, 35 } }, - { 0, { 36, 37, 44, 45 } }, { 0, { 38, 39, 46, 47 } }, - { 0, { 40, 41, 48, 49 } }, { 0, { 42, 43, 50, 51 } }, - { 0, { 52, 53, 60, 61 } }, { 0, { 54, 55, 62, 63 } }, - { 0, { 56, 57, 64, 65 } }, { 0, { 58, 59, 66, 67 } }, - { 0, { 68, 69, 76, 77 } }, { 0, { 70, 71, 78, 79 } }, - { 0, { 72, 73, 80, 81 } }, { 0, { 74, 75, 82, 83 } }, -}; - -static const RD_RECORD_IDX_NODE rd_record_tree_1_4[] = { - { 0, { 1, -1, 2, -1 } }, - { 0, { 3, 4, -1, -1 } }, - { 0, { 5, 6, -1, -1 } }, -}; - -static const RD_RECORD_IDX_NODE rd_record_tree_4_1[] = { - { 0, { 1, 2, -1, -1 } }, - { 0, { 3, 4, -1, -1 } }, - { 0, { 5, 6, -1, -1 } }, -}; - -static const RD_RECORD_IDX_NODE *rd_record_tree[BLOCK_SIZES_ALL] = { - NULL, // BLOCK_4X4 - NULL, // BLOCK_4X8 - NULL, // BLOCK_8X4 - rd_record_tree_8x8, // BLOCK_8X8 - rd_record_tree_8x16, // BLOCK_8X16 - rd_record_tree_16x8, // BLOCK_16X8 - rd_record_tree_16x16, // BLOCK_16X16 - rd_record_tree_1_2, // BLOCK_16X32 - rd_record_tree_2_1, // BLOCK_32X16 - rd_record_tree_sqr, // BLOCK_32X32 - rd_record_tree_1_2, // BLOCK_32X64 - rd_record_tree_2_1, // BLOCK_64X32 - rd_record_tree_sqr, // BLOCK_64X64 - rd_record_tree_64x128, // BLOCK_64X128 - rd_record_tree_128x64, // BLOCK_128X64 - rd_record_tree_128x128, // BLOCK_128X128 - NULL, // BLOCK_4X16 - NULL, // BLOCK_16X4 - rd_record_tree_1_4, // BLOCK_8X32 - rd_record_tree_4_1, // BLOCK_32X8 - rd_record_tree_1_4, // BLOCK_16X64 - rd_record_tree_4_1, // BLOCK_64X16 -}; - -static const int rd_record_tree_size[BLOCK_SIZES_ALL] = { - 0, // BLOCK_4X4 - 0, // BLOCK_4X8 - 0, // BLOCK_8X4 - sizeof(rd_record_tree_8x8) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_8X8 - sizeof(rd_record_tree_8x16) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_8X16 - sizeof(rd_record_tree_16x8) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X8 - sizeof(rd_record_tree_16x16) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X16 - sizeof(rd_record_tree_1_2) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X32 - sizeof(rd_record_tree_2_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X16 - sizeof(rd_record_tree_sqr) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X32 - sizeof(rd_record_tree_1_2) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X64 - sizeof(rd_record_tree_2_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X32 - sizeof(rd_record_tree_sqr) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X64 - sizeof(rd_record_tree_64x128) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X128 - sizeof(rd_record_tree_128x64) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_128X64 - sizeof(rd_record_tree_128x128) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_128X128 - 0, // BLOCK_4X16 - 0, // BLOCK_16X4 - sizeof(rd_record_tree_1_4) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_8X32 - sizeof(rd_record_tree_4_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_32X8 - sizeof(rd_record_tree_1_4) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_16X64 - sizeof(rd_record_tree_4_1) / sizeof(RD_RECORD_IDX_NODE), // BLOCK_64X16 -}; - -static INLINE void init_rd_record_tree(TXB_RD_INFO_NODE *tree, - BLOCK_SIZE bsize) { - const RD_RECORD_IDX_NODE *rd_record = rd_record_tree[bsize]; - const int size = rd_record_tree_size[bsize]; - for (int i = 0; i < size; ++i) { - if (rd_record[i].leaf) { - av1_zero(tree[i].children); - } else { - for (int j = 0; j < 4; ++j) { - const int8_t idx = rd_record[i].children[j]; - tree[i].children[j] = idx > 0 ? &tree[idx] : NULL; - } - } - } -} - -// Go through all TX blocks that could be used in TX size search, compute -// residual hash values for them and find matching RD info that stores previous -// RD search results for these TX blocks. The idea is to prevent repeated -// rate/distortion computations that happen because of the combination of -// partition and TX size search. The resulting RD info records are returned in -// the form of a quadtree for easier access in actual TX size search. -static int find_tx_size_rd_records(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, - int mi_col, TXB_RD_INFO_NODE *dst_rd_info) { - TXB_RD_RECORD *rd_records_table[4] = { x->txb_rd_record_8X8, - x->txb_rd_record_16X16, - x->txb_rd_record_32X32, - x->txb_rd_record_64X64 }; - const TX_SIZE max_square_tx_size = max_txsize_lookup[bsize]; - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - - // Hashing is performed only for square TX sizes larger than TX_4X4 - if (max_square_tx_size < TX_8X8) return 0; - const int diff_stride = bw; - const struct macroblock_plane *const p = &x->plane[0]; - const int16_t *diff = &p->src_diff[0]; - init_rd_record_tree(dst_rd_info, bsize); - // Coordinates of the top-left corner of current block within the superblock - // measured in pixels: - const int mi_row_in_sb = (mi_row % MAX_MIB_SIZE) << MI_SIZE_LOG2; - const int mi_col_in_sb = (mi_col % MAX_MIB_SIZE) << MI_SIZE_LOG2; - int cur_rd_info_idx = 0; - int cur_tx_depth = 0; - TX_SIZE cur_tx_size = max_txsize_rect_lookup[bsize]; - while (cur_tx_depth <= MAX_VARTX_DEPTH) { - const int cur_tx_bw = tx_size_wide[cur_tx_size]; - const int cur_tx_bh = tx_size_high[cur_tx_size]; - if (cur_tx_bw < 8 || cur_tx_bh < 8) break; - const TX_SIZE next_tx_size = sub_tx_size_map[cur_tx_size]; - const int tx_size_idx = cur_tx_size - TX_8X8; - for (int row = 0; row < bh; row += cur_tx_bh) { - for (int col = 0; col < bw; col += cur_tx_bw) { - if (cur_tx_bw != cur_tx_bh) { - // Use dummy nodes for all rectangular transforms within the - // TX size search tree. - dst_rd_info[cur_rd_info_idx].rd_info_array = NULL; - } else { - // Get spatial location of this TX block within the superblock - // (measured in cur_tx_bsize units). - const int row_in_sb = (mi_row_in_sb + row) / cur_tx_bh; - const int col_in_sb = (mi_col_in_sb + col) / cur_tx_bw; - - int16_t hash_data[MAX_SB_SQUARE]; - int16_t *cur_hash_row = hash_data; - const int16_t *cur_diff_row = diff + row * diff_stride + col; - for (int i = 0; i < cur_tx_bh; i++) { - memcpy(cur_hash_row, cur_diff_row, sizeof(*hash_data) * cur_tx_bw); - cur_hash_row += cur_tx_bw; - cur_diff_row += diff_stride; - } - const int hash = av1_get_crc32c_value(&x->mb_rd_record.crc_calculator, - (uint8_t *)hash_data, - 2 * cur_tx_bw * cur_tx_bh); - // Find corresponding RD info based on the hash value. - const int record_idx = - row_in_sb * (MAX_MIB_SIZE >> (tx_size_idx + 1)) + col_in_sb; - TXB_RD_RECORD *records = &rd_records_table[tx_size_idx][record_idx]; - int idx = find_tx_size_rd_info(records, hash); - dst_rd_info[cur_rd_info_idx].rd_info_array = - &records->tx_rd_info[idx]; - } - ++cur_rd_info_idx; - } - } - cur_tx_size = next_tx_size; - ++cur_tx_depth; - } - return 1; -} - -// origin_threshold * 128 / 100 -static const uint32_t skip_pred_threshold[3][BLOCK_SIZES_ALL] = { - { - 64, 64, 64, 70, 60, 60, 68, 68, 68, 68, 68, - 68, 68, 68, 68, 68, 64, 64, 70, 70, 68, 68, - }, - { - 88, 88, 88, 86, 87, 87, 68, 68, 68, 68, 68, - 68, 68, 68, 68, 68, 88, 88, 86, 86, 68, 68, - }, - { - 90, 93, 93, 90, 93, 93, 74, 74, 74, 74, 74, - 74, 74, 74, 74, 74, 90, 90, 90, 90, 74, 74, - }, -}; - -// lookup table for predict_skip_flag -// int max_tx_size = max_txsize_rect_lookup[bsize]; -// if (tx_size_high[max_tx_size] > 16 || tx_size_wide[max_tx_size] > 16) -// max_tx_size = AOMMIN(max_txsize_lookup[bsize], TX_16X16); -static const TX_SIZE max_predict_sf_tx_size[BLOCK_SIZES_ALL] = { - TX_4X4, TX_4X8, TX_8X4, TX_8X8, TX_8X16, TX_16X8, - TX_16X16, TX_16X16, TX_16X16, TX_16X16, TX_16X16, TX_16X16, - TX_16X16, TX_16X16, TX_16X16, TX_16X16, TX_4X16, TX_16X4, - TX_8X8, TX_8X8, TX_16X16, TX_16X16, -}; - -// Uses simple features on top of DCT coefficients to quickly predict -// whether optimal RD decision is to skip encoding the residual. -// The sse value is stored in dist. -static int predict_skip_flag(MACROBLOCK *x, BLOCK_SIZE bsize, int64_t *dist, - int reduced_tx_set) { - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - const MACROBLOCKD *xd = &x->e_mbd; - const int16_t dc_q = av1_dc_quant_QTX(x->qindex, 0, xd->bd); - - *dist = pixel_diff_dist(x, 0, 0, 0, bsize, bsize); - const int64_t mse = *dist / bw / bh; - // Normalized quantizer takes the transform upscaling factor (8 for tx size - // smaller than 32) into account. - const int16_t normalized_dc_q = dc_q >> 3; - const int64_t mse_thresh = (int64_t)normalized_dc_q * normalized_dc_q / 8; - // Predict not to skip when mse is larger than threshold. - if (mse > mse_thresh) return 0; - - const int max_tx_size = max_predict_sf_tx_size[bsize]; - const int tx_h = tx_size_high[max_tx_size]; - const int tx_w = tx_size_wide[max_tx_size]; - DECLARE_ALIGNED(32, tran_low_t, coefs[32 * 32]); - TxfmParam param; - param.tx_type = DCT_DCT; - param.tx_size = max_tx_size; - param.bd = xd->bd; - param.is_hbd = get_bitdepth_data_path_index(xd); - param.lossless = 0; - param.tx_set_type = av1_get_ext_tx_set_type( - param.tx_size, is_inter_block(xd->mi[0]), reduced_tx_set); - const int bd_idx = (xd->bd == 8) ? 0 : ((xd->bd == 10) ? 1 : 2); - const uint32_t max_qcoef_thresh = skip_pred_threshold[bd_idx][bsize]; - const int16_t *src_diff = x->plane[0].src_diff; - const int n_coeff = tx_w * tx_h; - const int16_t ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd); - const uint32_t dc_thresh = max_qcoef_thresh * dc_q; - const uint32_t ac_thresh = max_qcoef_thresh * ac_q; - for (int row = 0; row < bh; row += tx_h) { - for (int col = 0; col < bw; col += tx_w) { - av1_fwd_txfm(src_diff + col, coefs, bw, ¶m); - // Operating on TX domain, not pixels; we want the QTX quantizers - const uint32_t dc_coef = (((uint32_t)abs(coefs[0])) << 7); - if (dc_coef >= dc_thresh) return 0; - for (int i = 1; i < n_coeff; ++i) { - const uint32_t ac_coef = (((uint32_t)abs(coefs[i])) << 7); - if (ac_coef >= ac_thresh) return 0; - } - } - src_diff += tx_h * bw; - } - return 1; -} - -// Used to set proper context for early termination with skip = 1. -static void set_skip_flag(MACROBLOCK *x, RD_STATS *rd_stats, int bsize, - int64_t dist) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const int n4 = bsize_to_num_blk(bsize); - const TX_SIZE tx_size = max_txsize_rect_lookup[bsize]; - memset(mbmi->txk_type, DCT_DCT, sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN); - memset(mbmi->inter_tx_size, tx_size, sizeof(mbmi->inter_tx_size)); - mbmi->tx_size = tx_size; - for (int i = 0; i < n4; ++i) set_blk_skip(x, 0, i, 1); - rd_stats->skip = 1; - rd_stats->rate = 0; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - dist = ROUND_POWER_OF_TWO(dist, (xd->bd - 8) * 2); - rd_stats->dist = rd_stats->sse = (dist << 4); -} - -static void select_tx_type_yrd(const AV1_COMP *cpi, MACROBLOCK *x, - RD_STATS *rd_stats, BLOCK_SIZE bsize, int mi_row, - int mi_col, int64_t ref_best_rd) { - const AV1_COMMON *cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - int64_t rd = INT64_MAX; - int64_t best_rd = INT64_MAX; - const int is_inter = is_inter_block(mbmi); - const int n4 = bsize_to_num_blk(bsize); - // Get the tx_size 1 level down - const TX_SIZE min_tx_size = sub_tx_size_map[max_txsize_rect_lookup[bsize]]; - const TxSetType tx_set_type = - av1_get_ext_tx_set_type(min_tx_size, is_inter, cm->reduced_tx_set_used); - const int within_border = - mi_row >= xd->tile.mi_row_start && - (mi_row + mi_size_high[bsize] < xd->tile.mi_row_end) && - mi_col >= xd->tile.mi_col_start && - (mi_col + mi_size_wide[bsize] < xd->tile.mi_col_end); - - av1_invalid_rd_stats(rd_stats); - - if (cpi->sf.model_based_prune_tx_search_level && ref_best_rd != INT64_MAX) { - int model_rate; - int64_t model_dist; - int model_skip; - model_rd_sb_fn[MODELRD_TYPE_TX_SEARCH_PRUNE]( - cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &model_rate, &model_dist, - &model_skip, NULL, NULL, NULL, NULL); - const int64_t model_rd = RDCOST(x->rdmult, model_rate, model_dist); - // If the modeled rd is a lot worse than the best so far, breakout. - // TODO(debargha, urvang): Improve the model and make the check below - // tighter. - assert(cpi->sf.model_based_prune_tx_search_level >= 0 && - cpi->sf.model_based_prune_tx_search_level <= 2); - static const int prune_factor_by8[] = { 2 + MODELRD_TYPE_TX_SEARCH_PRUNE, - 4 + MODELRD_TYPE_TX_SEARCH_PRUNE }; - if (!model_skip && - ((model_rd * - prune_factor_by8[cpi->sf.model_based_prune_tx_search_level - 1]) >> - 3) > ref_best_rd) - return; - } - - const uint32_t hash = get_block_residue_hash(x, bsize); - MB_RD_RECORD *mb_rd_record = &x->mb_rd_record; - - if (ref_best_rd != INT64_MAX && within_border && cpi->sf.use_mb_rd_hash) { - for (int i = 0; i < mb_rd_record->num; ++i) { - const int index = (mb_rd_record->index_start + i) % RD_RECORD_BUFFER_LEN; - // If there is a match in the tx_rd_record, fetch the RD decision and - // terminate early. - if (mb_rd_record->tx_rd_info[index].hash_value == hash) { - MB_RD_INFO *tx_rd_info = &mb_rd_record->tx_rd_info[index]; - fetch_tx_rd_info(n4, tx_rd_info, rd_stats, x); - return; - } - } - } - - // If we predict that skip is the optimal RD decision - set the respective - // context and terminate early. - int64_t dist; - if (is_inter && cpi->sf.tx_type_search.use_skip_flag_prediction && - predict_skip_flag(x, bsize, &dist, cm->reduced_tx_set_used)) { - set_skip_flag(x, rd_stats, bsize, dist); - // Save the RD search results into tx_rd_record. - if (within_border) save_tx_rd_info(n4, hash, x, rd_stats, mb_rd_record); - return; - } - - // Precompute residual hashes and find existing or add new RD records to - // store and reuse rate and distortion values to speed up TX size search. - TXB_RD_INFO_NODE matched_rd_info[4 + 16 + 64]; - int found_rd_info = 0; - if (ref_best_rd != INT64_MAX && within_border && cpi->sf.use_inter_txb_hash) { - found_rd_info = - find_tx_size_rd_records(x, bsize, mi_row, mi_col, matched_rd_info); - } - - prune_tx(cpi, bsize, x, xd, tx_set_type); - - int found = 0; - - RD_STATS this_rd_stats; - av1_init_rd_stats(&this_rd_stats); - - rd = select_tx_size_fix_type(cpi, x, &this_rd_stats, bsize, ref_best_rd, - found_rd_info ? matched_rd_info : NULL); - assert(IMPLIES(this_rd_stats.skip && !this_rd_stats.invalid_rate, - this_rd_stats.rate == 0)); - - ref_best_rd = AOMMIN(rd, ref_best_rd); - if (rd < best_rd) { - *rd_stats = this_rd_stats; - found = 1; - } - - // Reset the pruning flags. - av1_zero(x->tx_search_prune); - x->tx_split_prune_flag = 0; - - // We should always find at least one candidate unless ref_best_rd is less - // than INT64_MAX (in which case, all the calls to select_tx_size_fix_type - // might have failed to find something better) - assert(IMPLIES(!found, ref_best_rd != INT64_MAX)); - if (!found) return; - - // Save the RD search results into tx_rd_record. - if (within_border && cpi->sf.use_mb_rd_hash) - save_tx_rd_info(n4, hash, x, rd_stats, mb_rd_record); -} - -static void tx_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, int blk_row, - int blk_col, int plane, int block, TX_SIZE tx_size, - BLOCK_SIZE plane_bsize, ENTROPY_CONTEXT *above_ctx, - ENTROPY_CONTEXT *left_ctx, RD_STATS *rd_stats, - FAST_TX_SEARCH_MODE ftxs_mode) { - assert(plane > 0); - assert(tx_size < TX_SIZES_ALL); - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const int max_blocks_high = max_block_high(xd, plane_bsize, plane); - const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane); - if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; - - ENTROPY_CONTEXT *ta = above_ctx + blk_col; - ENTROPY_CONTEXT *tl = left_ctx + blk_row; - TXB_CTX txb_ctx; - get_txb_ctx(plane_bsize, tx_size, plane, ta, tl, &txb_ctx); - const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); - const int zero_blk_rate = x->coeff_costs[txs_ctx][PLANE_TYPE_UV] - .txb_skip_cost[txb_ctx.txb_skip_ctx][1]; - tx_block_rd_b(cpi, x, tx_size, blk_row, blk_col, plane, block, plane_bsize, - &txb_ctx, rd_stats, ftxs_mode, INT64_MAX, NULL); - - const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - const int blk_idx = blk_row * mi_width + blk_col; - - av1_set_txb_context(x, plane, block, tx_size, ta, tl); - if ((RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist) >= - RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse) || - rd_stats->skip == 1) && - !xd->lossless[mbmi->segment_id]) { - rd_stats->rate = zero_blk_rate; - rd_stats->dist = rd_stats->sse; - } - - // Set chroma blk_skip to 0 - set_blk_skip(x, plane, blk_idx, 0); -} - -// Return value 0: early termination triggered, no valid rd cost available; -// 1: rd cost values are valid. -static int inter_block_uvrd(const AV1_COMP *cpi, MACROBLOCK *x, - RD_STATS *rd_stats, BLOCK_SIZE bsize, - int64_t non_skip_ref_best_rd, - int64_t skip_ref_best_rd, - FAST_TX_SEARCH_MODE ftxs_mode) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - int plane; - int is_cost_valid = 1; - int64_t this_rd = 0; - int64_t skip_rd = 0; - - if ((non_skip_ref_best_rd < 0) && (skip_ref_best_rd < 0)) is_cost_valid = 0; - - av1_init_rd_stats(rd_stats); - - if (x->skip_chroma_rd) { - if (!is_cost_valid) av1_invalid_rd_stats(rd_stats); - - return is_cost_valid; - } - - const BLOCK_SIZE bsizec = scale_chroma_bsize( - bsize, xd->plane[1].subsampling_x, xd->plane[1].subsampling_y); - - if (is_inter_block(mbmi) && is_cost_valid) { - for (plane = 1; plane < MAX_MB_PLANE; ++plane) - av1_subtract_plane(x, bsizec, plane); - } - - if (is_cost_valid) { - for (plane = 1; plane < MAX_MB_PLANE; ++plane) { - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsizec, pd->subsampling_x, pd->subsampling_y); - const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - const int mi_height = - block_size_high[plane_bsize] >> tx_size_high_log2[0]; - const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, plane); - const int bh = tx_size_high_unit[max_tx_size]; - const int bw = tx_size_wide_unit[max_tx_size]; - int idx, idy; - int block = 0; - const int step = bh * bw; - ENTROPY_CONTEXT ta[MAX_MIB_SIZE]; - ENTROPY_CONTEXT tl[MAX_MIB_SIZE]; - av1_get_entropy_contexts(bsizec, pd, ta, tl); - - for (idy = 0; idy < mi_height; idy += bh) { - for (idx = 0; idx < mi_width; idx += bw) { - RD_STATS pn_rd_stats; - av1_init_rd_stats(&pn_rd_stats); - tx_block_uvrd(cpi, x, idy, idx, plane, block, max_tx_size, - plane_bsize, ta, tl, &pn_rd_stats, ftxs_mode); - if (pn_rd_stats.rate == INT_MAX) { - av1_invalid_rd_stats(rd_stats); - return 0; - } - av1_merge_rd_stats(rd_stats, &pn_rd_stats); - this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); - skip_rd = RDCOST(x->rdmult, 0, rd_stats->sse); - if ((this_rd > non_skip_ref_best_rd) && - (skip_rd > skip_ref_best_rd)) { - av1_invalid_rd_stats(rd_stats); - return 0; - } - block += step; - } - } - } - } else { - // reset cost value - av1_invalid_rd_stats(rd_stats); - } - - return is_cost_valid; -} - -static void rd_pick_palette_intra_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, - int dc_mode_cost, - uint8_t *best_palette_color_map, - MB_MODE_INFO *const best_mbmi, - int64_t *best_rd, int *rate, - int *rate_tokenonly, int64_t *distortion, - int *skippable) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - assert(!is_inter_block(mbmi)); - assert( - av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type)); - PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - const BLOCK_SIZE bsize = mbmi->sb_type; - const SequenceHeader *const seq_params = &cpi->common.seq_params; - int this_rate; - int64_t this_rd; - int colors_u, colors_v, colors; - const int src_stride = x->plane[1].src.stride; - const uint8_t *const src_u = x->plane[1].src.buf; - const uint8_t *const src_v = x->plane[2].src.buf; - uint8_t *const color_map = xd->plane[1].color_index_map; - RD_STATS tokenonly_rd_stats; - int plane_block_width, plane_block_height, rows, cols; - av1_get_block_dimensions(bsize, 1, xd, &plane_block_width, - &plane_block_height, &rows, &cols); - - mbmi->uv_mode = UV_DC_PRED; - - int count_buf[1 << 12]; // Maximum (1 << 12) color levels. - if (seq_params->use_highbitdepth) { - colors_u = av1_count_colors_highbd(src_u, src_stride, rows, cols, - seq_params->bit_depth, count_buf); - colors_v = av1_count_colors_highbd(src_v, src_stride, rows, cols, - seq_params->bit_depth, count_buf); - } else { - colors_u = av1_count_colors(src_u, src_stride, rows, cols, count_buf); - colors_v = av1_count_colors(src_v, src_stride, rows, cols, count_buf); - } - - uint16_t color_cache[2 * PALETTE_MAX_SIZE]; - const int n_cache = av1_get_palette_cache(xd, 1, color_cache); - - colors = colors_u > colors_v ? colors_u : colors_v; - if (colors > 1 && colors <= 64) { - int r, c, n, i, j; - const int max_itr = 50; - int lb_u, ub_u, val_u; - int lb_v, ub_v, val_v; - int *const data = x->palette_buffer->kmeans_data_buf; - int centroids[2 * PALETTE_MAX_SIZE]; - - uint16_t *src_u16 = CONVERT_TO_SHORTPTR(src_u); - uint16_t *src_v16 = CONVERT_TO_SHORTPTR(src_v); - if (seq_params->use_highbitdepth) { - lb_u = src_u16[0]; - ub_u = src_u16[0]; - lb_v = src_v16[0]; - ub_v = src_v16[0]; - } else { - lb_u = src_u[0]; - ub_u = src_u[0]; - lb_v = src_v[0]; - ub_v = src_v[0]; - } - - for (r = 0; r < rows; ++r) { - for (c = 0; c < cols; ++c) { - if (seq_params->use_highbitdepth) { - val_u = src_u16[r * src_stride + c]; - val_v = src_v16[r * src_stride + c]; - data[(r * cols + c) * 2] = val_u; - data[(r * cols + c) * 2 + 1] = val_v; - } else { - val_u = src_u[r * src_stride + c]; - val_v = src_v[r * src_stride + c]; - data[(r * cols + c) * 2] = val_u; - data[(r * cols + c) * 2 + 1] = val_v; - } - if (val_u < lb_u) - lb_u = val_u; - else if (val_u > ub_u) - ub_u = val_u; - if (val_v < lb_v) - lb_v = val_v; - else if (val_v > ub_v) - ub_v = val_v; - } - } - - for (n = colors > PALETTE_MAX_SIZE ? PALETTE_MAX_SIZE : colors; n >= 2; - --n) { - for (i = 0; i < n; ++i) { - centroids[i * 2] = lb_u + (2 * i + 1) * (ub_u - lb_u) / n / 2; - centroids[i * 2 + 1] = lb_v + (2 * i + 1) * (ub_v - lb_v) / n / 2; - } - av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr); - optimize_palette_colors(color_cache, n_cache, n, 2, centroids); - // Sort the U channel colors in ascending order. - for (i = 0; i < 2 * (n - 1); i += 2) { - int min_idx = i; - int min_val = centroids[i]; - for (j = i + 2; j < 2 * n; j += 2) - if (centroids[j] < min_val) min_val = centroids[j], min_idx = j; - if (min_idx != i) { - int temp_u = centroids[i], temp_v = centroids[i + 1]; - centroids[i] = centroids[min_idx]; - centroids[i + 1] = centroids[min_idx + 1]; - centroids[min_idx] = temp_u, centroids[min_idx + 1] = temp_v; - } - } - av1_calc_indices(data, centroids, color_map, rows * cols, n, 2); - extend_palette_color_map(color_map, cols, rows, plane_block_width, - plane_block_height); - pmi->palette_size[1] = n; - for (i = 1; i < 3; ++i) { - for (j = 0; j < n; ++j) { - if (seq_params->use_highbitdepth) - pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = clip_pixel_highbd( - (int)centroids[j * 2 + i - 1], seq_params->bit_depth); - else - pmi->palette_colors[i * PALETTE_MAX_SIZE + j] = - clip_pixel((int)centroids[j * 2 + i - 1]); - } - } - - super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, *best_rd); - if (tokenonly_rd_stats.rate == INT_MAX) continue; - this_rate = tokenonly_rd_stats.rate + - intra_mode_info_cost_uv(cpi, x, mbmi, bsize, dc_mode_cost); - this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); - if (this_rd < *best_rd) { - *best_rd = this_rd; - *best_mbmi = *mbmi; - memcpy(best_palette_color_map, color_map, - plane_block_width * plane_block_height * - sizeof(best_palette_color_map[0])); - *rate = this_rate; - *distortion = tokenonly_rd_stats.dist; - *rate_tokenonly = tokenonly_rd_stats.rate; - *skippable = tokenonly_rd_stats.skip; - } - } - } - if (best_mbmi->palette_mode_info.palette_size[1] > 0) { - memcpy(color_map, best_palette_color_map, - plane_block_width * plane_block_height * - sizeof(best_palette_color_map[0])); - } -} - -// Run RD calculation with given chroma intra prediction angle., and return -// the RD cost. Update the best mode info. if the RD cost is the best so far. -static int64_t pick_intra_angle_routine_sbuv( - const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, - int rate_overhead, int64_t best_rd_in, int *rate, RD_STATS *rd_stats, - int *best_angle_delta, int64_t *best_rd) { - MB_MODE_INFO *mbmi = x->e_mbd.mi[0]; - assert(!is_inter_block(mbmi)); - int this_rate; - int64_t this_rd; - RD_STATS tokenonly_rd_stats; - - if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd_in)) - return INT64_MAX; - this_rate = tokenonly_rd_stats.rate + - intra_mode_info_cost_uv(cpi, x, mbmi, bsize, rate_overhead); - this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); - if (this_rd < *best_rd) { - *best_rd = this_rd; - *best_angle_delta = mbmi->angle_delta[PLANE_TYPE_UV]; - *rate = this_rate; - rd_stats->rate = tokenonly_rd_stats.rate; - rd_stats->dist = tokenonly_rd_stats.dist; - rd_stats->skip = tokenonly_rd_stats.skip; - } - return this_rd; -} - -// With given chroma directional intra prediction mode, pick the best angle -// delta. Return true if a RD cost that is smaller than the input one is found. -static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int rate_overhead, - int64_t best_rd, int *rate, - RD_STATS *rd_stats) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - assert(!is_inter_block(mbmi)); - int i, angle_delta, best_angle_delta = 0; - int64_t this_rd, best_rd_in, rd_cost[2 * (MAX_ANGLE_DELTA + 2)]; - - rd_stats->rate = INT_MAX; - rd_stats->skip = 0; - rd_stats->dist = INT64_MAX; - for (i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX; - - for (angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) { - for (i = 0; i < 2; ++i) { - best_rd_in = (best_rd == INT64_MAX) - ? INT64_MAX - : (best_rd + (best_rd >> ((angle_delta == 0) ? 3 : 5))); - mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta; - this_rd = pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, - best_rd_in, rate, rd_stats, - &best_angle_delta, &best_rd); - rd_cost[2 * angle_delta + i] = this_rd; - if (angle_delta == 0) { - if (this_rd == INT64_MAX) return 0; - rd_cost[1] = this_rd; - break; - } - } - } - - assert(best_rd != INT64_MAX); - for (angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) { - int64_t rd_thresh; - for (i = 0; i < 2; ++i) { - int skip_search = 0; - rd_thresh = best_rd + (best_rd >> 5); - if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh && - rd_cost[2 * (angle_delta - 1) + i] > rd_thresh) - skip_search = 1; - if (!skip_search) { - mbmi->angle_delta[PLANE_TYPE_UV] = (1 - 2 * i) * angle_delta; - pick_intra_angle_routine_sbuv(cpi, x, bsize, rate_overhead, best_rd, - rate, rd_stats, &best_angle_delta, - &best_rd); - } - } - } - - mbmi->angle_delta[PLANE_TYPE_UV] = best_angle_delta; - return rd_stats->rate != INT_MAX; -} - -#define PLANE_SIGN_TO_JOINT_SIGN(plane, a, b) \ - (plane == CFL_PRED_U ? a * CFL_SIGNS + b - 1 : b * CFL_SIGNS + a - 1) -static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi, - TX_SIZE tx_size, int64_t best_rd) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - - const BLOCK_SIZE bsize = mbmi->sb_type; -#if CONFIG_DEBUG - assert(is_cfl_allowed(xd)); - const int ssx = xd->plane[AOM_PLANE_U].subsampling_x; - const int ssy = xd->plane[AOM_PLANE_U].subsampling_y; - const BLOCK_SIZE plane_bsize = get_plane_block_size(mbmi->sb_type, ssx, ssy); - (void)plane_bsize; - assert(plane_bsize < BLOCK_SIZES_ALL); - if (!xd->lossless[mbmi->segment_id]) { - assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]); - assert(block_size_high[plane_bsize] == tx_size_high[tx_size]); - } -#endif // CONFIG_DEBUG - - xd->cfl.use_dc_pred_cache = 1; - const int64_t mode_rd = - RDCOST(x->rdmult, - x->intra_uv_mode_cost[CFL_ALLOWED][mbmi->mode][UV_CFL_PRED], 0); - int64_t best_rd_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES]; - int best_c[CFL_JOINT_SIGNS][CFL_PRED_PLANES]; -#if CONFIG_DEBUG - int best_rate_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES]; -#endif // CONFIG_DEBUG - - for (int plane = 0; plane < CFL_PRED_PLANES; plane++) { - RD_STATS rd_stats; - av1_init_rd_stats(&rd_stats); - for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) { - best_rd_uv[joint_sign][plane] = INT64_MAX; - best_c[joint_sign][plane] = 0; - } - // Collect RD stats for an alpha value of zero in this plane. - // Skip i == CFL_SIGN_ZERO as (0, 0) is invalid. - for (int i = CFL_SIGN_NEG; i < CFL_SIGNS; i++) { - const int joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, CFL_SIGN_ZERO, i); - if (i == CFL_SIGN_NEG) { - mbmi->cfl_alpha_idx = 0; - mbmi->cfl_alpha_signs = joint_sign; - txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, plane + 1, bsize, tx_size, - cpi->sf.use_fast_coef_costing, FTXS_NONE); - if (rd_stats.rate == INT_MAX) break; - } - const int alpha_rate = x->cfl_cost[joint_sign][plane][0]; - best_rd_uv[joint_sign][plane] = - RDCOST(x->rdmult, rd_stats.rate + alpha_rate, rd_stats.dist); -#if CONFIG_DEBUG - best_rate_uv[joint_sign][plane] = rd_stats.rate; -#endif // CONFIG_DEBUG - } - } - - int best_joint_sign = -1; - - for (int plane = 0; plane < CFL_PRED_PLANES; plane++) { - for (int pn_sign = CFL_SIGN_NEG; pn_sign < CFL_SIGNS; pn_sign++) { - int progress = 0; - for (int c = 0; c < CFL_ALPHABET_SIZE; c++) { - int flag = 0; - RD_STATS rd_stats; - if (c > 2 && progress < c) break; - av1_init_rd_stats(&rd_stats); - for (int i = 0; i < CFL_SIGNS; i++) { - const int joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, pn_sign, i); - if (i == 0) { - mbmi->cfl_alpha_idx = (c << CFL_ALPHABET_SIZE_LOG2) + c; - mbmi->cfl_alpha_signs = joint_sign; - txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, plane + 1, bsize, - tx_size, cpi->sf.use_fast_coef_costing, FTXS_NONE); - if (rd_stats.rate == INT_MAX) break; - } - const int alpha_rate = x->cfl_cost[joint_sign][plane][c]; - int64_t this_rd = - RDCOST(x->rdmult, rd_stats.rate + alpha_rate, rd_stats.dist); - if (this_rd >= best_rd_uv[joint_sign][plane]) continue; - best_rd_uv[joint_sign][plane] = this_rd; - best_c[joint_sign][plane] = c; -#if CONFIG_DEBUG - best_rate_uv[joint_sign][plane] = rd_stats.rate; -#endif // CONFIG_DEBUG - flag = 2; - if (best_rd_uv[joint_sign][!plane] == INT64_MAX) continue; - this_rd += mode_rd + best_rd_uv[joint_sign][!plane]; - if (this_rd >= best_rd) continue; - best_rd = this_rd; - best_joint_sign = joint_sign; - } - progress += flag; - } - } - } - - int best_rate_overhead = INT_MAX; - int ind = 0; - if (best_joint_sign >= 0) { - const int u = best_c[best_joint_sign][CFL_PRED_U]; - const int v = best_c[best_joint_sign][CFL_PRED_V]; - ind = (u << CFL_ALPHABET_SIZE_LOG2) + v; - best_rate_overhead = x->cfl_cost[best_joint_sign][CFL_PRED_U][u] + - x->cfl_cost[best_joint_sign][CFL_PRED_V][v]; -#if CONFIG_DEBUG - xd->cfl.rate = x->intra_uv_mode_cost[CFL_ALLOWED][mbmi->mode][UV_CFL_PRED] + - best_rate_overhead + - best_rate_uv[best_joint_sign][CFL_PRED_U] + - best_rate_uv[best_joint_sign][CFL_PRED_V]; -#endif // CONFIG_DEBUG - } else { - best_joint_sign = 0; - } - - mbmi->cfl_alpha_idx = ind; - mbmi->cfl_alpha_signs = best_joint_sign; - xd->cfl.use_dc_pred_cache = 0; - xd->cfl.dc_pred_is_cached[0] = 0; - xd->cfl.dc_pred_is_cached[1] = 0; - return best_rate_overhead; -} - -static void init_sbuv_mode(MB_MODE_INFO *const mbmi) { - mbmi->uv_mode = UV_DC_PRED; - mbmi->palette_mode_info.palette_size[1] = 0; -} - -static int64_t rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x, - int *rate, int *rate_tokenonly, - int64_t *distortion, int *skippable, - BLOCK_SIZE bsize, TX_SIZE max_tx_size) { - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - assert(!is_inter_block(mbmi)); - MB_MODE_INFO best_mbmi = *mbmi; - int64_t best_rd = INT64_MAX, this_rd; - - for (int mode_idx = 0; mode_idx < UV_INTRA_MODES; ++mode_idx) { - int this_rate; - RD_STATS tokenonly_rd_stats; - UV_PREDICTION_MODE mode = uv_rd_search_mode_order[mode_idx]; - const int is_directional_mode = av1_is_directional_mode(get_uv_mode(mode)); - if (!(cpi->sf.intra_uv_mode_mask[txsize_sqr_up_map[max_tx_size]] & - (1 << mode))) - continue; - - mbmi->uv_mode = mode; - int cfl_alpha_rate = 0; - if (mode == UV_CFL_PRED) { - if (!is_cfl_allowed(xd)) continue; - assert(!is_directional_mode); - const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd); - cfl_alpha_rate = cfl_rd_pick_alpha(x, cpi, uv_tx_size, best_rd); - if (cfl_alpha_rate == INT_MAX) continue; - } - mbmi->angle_delta[PLANE_TYPE_UV] = 0; - if (is_directional_mode && av1_use_angle_delta(mbmi->sb_type)) { - const int rate_overhead = - x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode]; - if (!rd_pick_intra_angle_sbuv(cpi, x, bsize, rate_overhead, best_rd, - &this_rate, &tokenonly_rd_stats)) - continue; - } else { - if (!super_block_uvrd(cpi, x, &tokenonly_rd_stats, bsize, best_rd)) { - continue; - } - } - const int mode_cost = - x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode] + - cfl_alpha_rate; - this_rate = tokenonly_rd_stats.rate + - intra_mode_info_cost_uv(cpi, x, mbmi, bsize, mode_cost); - if (mode == UV_CFL_PRED) { - assert(is_cfl_allowed(xd)); -#if CONFIG_DEBUG - if (!xd->lossless[mbmi->segment_id]) - assert(xd->cfl.rate == tokenonly_rd_stats.rate + mode_cost); -#endif // CONFIG_DEBUG - } - this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist); - - if (this_rd < best_rd) { - best_mbmi = *mbmi; - best_rd = this_rd; - *rate = this_rate; - *rate_tokenonly = tokenonly_rd_stats.rate; - *distortion = tokenonly_rd_stats.dist; - *skippable = tokenonly_rd_stats.skip; - } - } - - const int try_palette = - av1_allow_palette(cpi->common.allow_screen_content_tools, mbmi->sb_type); - if (try_palette) { - uint8_t *best_palette_color_map = x->palette_buffer->best_palette_color_map; - rd_pick_palette_intra_sbuv( - cpi, x, - x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][UV_DC_PRED], - best_palette_color_map, &best_mbmi, &best_rd, rate, rate_tokenonly, - distortion, skippable); - } - - *mbmi = best_mbmi; - // Make sure we actually chose a mode - assert(best_rd < INT64_MAX); - return best_rd; -} - -static void choose_intra_uv_mode(const AV1_COMP *const cpi, MACROBLOCK *const x, - BLOCK_SIZE bsize, TX_SIZE max_tx_size, - int *rate_uv, int *rate_uv_tokenonly, - int64_t *dist_uv, int *skip_uv, - UV_PREDICTION_MODE *mode_uv) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - const int mi_row = -xd->mb_to_top_edge >> (3 + MI_SIZE_LOG2); - const int mi_col = -xd->mb_to_left_edge >> (3 + MI_SIZE_LOG2); - // Use an estimated rd for uv_intra based on DC_PRED if the - // appropriate speed flag is set. - init_sbuv_mode(mbmi); - if (x->skip_chroma_rd) { - *rate_uv = 0; - *rate_uv_tokenonly = 0; - *dist_uv = 0; - *skip_uv = 1; - *mode_uv = UV_DC_PRED; - return; - } - xd->cfl.is_chroma_reference = - is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x, - cm->seq_params.subsampling_y); - bsize = scale_chroma_bsize(bsize, xd->plane[AOM_PLANE_U].subsampling_x, - xd->plane[AOM_PLANE_U].subsampling_y); - // Only store reconstructed luma when there's chroma RDO. When there's no - // chroma RDO, the reconstructed luma will be stored in encode_superblock(). - xd->cfl.store_y = store_cfl_required_rdo(cm, x); - if (xd->cfl.store_y) { - // Restore reconstructed luma values. - av1_encode_intra_block_plane(cpi, x, mbmi->sb_type, AOM_PLANE_Y, - cpi->optimize_seg_arr[mbmi->segment_id], - mi_row, mi_col); - xd->cfl.store_y = 0; - } - rd_pick_intra_sbuv_mode(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, - bsize, max_tx_size); - *mode_uv = mbmi->uv_mode; -} - -static int cost_mv_ref(const MACROBLOCK *const x, PREDICTION_MODE mode, - int16_t mode_context) { - if (is_inter_compound_mode(mode)) { - return x - ->inter_compound_mode_cost[mode_context][INTER_COMPOUND_OFFSET(mode)]; - } - - int mode_cost = 0; - int16_t mode_ctx = mode_context & NEWMV_CTX_MASK; - - assert(is_inter_mode(mode)); - - if (mode == NEWMV) { - mode_cost = x->newmv_mode_cost[mode_ctx][0]; - return mode_cost; - } else { - mode_cost = x->newmv_mode_cost[mode_ctx][1]; - mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK; - - if (mode == GLOBALMV) { - mode_cost += x->zeromv_mode_cost[mode_ctx][0]; - return mode_cost; - } else { - mode_cost += x->zeromv_mode_cost[mode_ctx][1]; - mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK; - mode_cost += x->refmv_mode_cost[mode_ctx][mode != NEARESTMV]; - return mode_cost; - } - } -} - -static int get_interinter_compound_mask_rate(const MACROBLOCK *const x, - const MB_MODE_INFO *const mbmi) { - switch (mbmi->interinter_comp.type) { - case COMPOUND_AVERAGE: return 0; - case COMPOUND_WEDGE: - return get_interinter_wedge_bits(mbmi->sb_type) > 0 - ? av1_cost_literal(1) + - x->wedge_idx_cost[mbmi->sb_type] - [mbmi->interinter_comp.wedge_index] - : 0; - case COMPOUND_DIFFWTD: return av1_cost_literal(1); - default: assert(0); return 0; - } -} - -typedef struct { - int eobs; - int brate; - int byrate; - int64_t bdist; - int64_t bsse; - int64_t brdcost; - int_mv mvs[2]; - int_mv pred_mv[2]; - int_mv ref_mv[2]; - - ENTROPY_CONTEXT ta[2]; - ENTROPY_CONTEXT tl[2]; -} SEG_RDSTAT; - -typedef struct { - int_mv *ref_mv[2]; - int_mv mvp; - - int64_t segment_rd; - int r; - int64_t d; - int64_t sse; - int segment_yrate; - PREDICTION_MODE modes[4]; - SEG_RDSTAT rdstat[4][INTER_MODES + INTER_COMPOUND_MODES]; - int mvthresh; -} BEST_SEG_INFO; - -static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) { - return (mv->row >> 3) < mv_limits->row_min || - (mv->row >> 3) > mv_limits->row_max || - (mv->col >> 3) < mv_limits->col_min || - (mv->col >> 3) > mv_limits->col_max; -} - -static INLINE PREDICTION_MODE get_single_mode(PREDICTION_MODE this_mode, - int ref_idx, int is_comp_pred) { - PREDICTION_MODE single_mode; - if (is_comp_pred) { - single_mode = - ref_idx ? compound_ref1_mode(this_mode) : compound_ref0_mode(this_mode); - } else { - single_mode = this_mode; - } - return single_mode; -} - -static void joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int_mv *cur_mv, int mi_row, - int mi_col, int_mv *ref_mv_sub8x8[2], - const uint8_t *mask, int mask_stride, - int *rate_mv, const int block) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - const int pw = block_size_wide[bsize]; - const int ph = block_size_high[bsize]; - const int plane = 0; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - // This function should only ever be called for compound modes - assert(has_second_ref(mbmi)); - const int_mv init_mv[2] = { cur_mv[0], cur_mv[1] }; - const int refs[2] = { mbmi->ref_frame[0], mbmi->ref_frame[1] }; - int_mv ref_mv[2]; - int ite, ref; - // ic and ir are the 4x4 coordinates of the sub8x8 at index "block" - const int ic = block & 1; - const int ir = (block - ic) >> 1; - struct macroblockd_plane *const pd = &xd->plane[0]; - const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic; - const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir; - - ConvolveParams conv_params = get_conv_params(0, plane, xd->bd); - conv_params.use_jnt_comp_avg = 0; - WarpTypesAllowed warp_types[2]; - for (ref = 0; ref < 2; ++ref) { - const WarpedMotionParams *const wm = - &xd->global_motion[xd->mi[0]->ref_frame[ref]]; - const int is_global = is_global_mv_block(xd->mi[0], wm->wmtype); - warp_types[ref].global_warp_allowed = is_global; - warp_types[ref].local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL; - } - - // Do joint motion search in compound mode to get more accurate mv. - struct buf_2d backup_yv12[2][MAX_MB_PLANE]; - int last_besterr[2] = { INT_MAX, INT_MAX }; - const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = { - av1_get_scaled_ref_frame(cpi, refs[0]), - av1_get_scaled_ref_frame(cpi, refs[1]) - }; - - // Prediction buffer from second frame. - DECLARE_ALIGNED(16, uint8_t, second_pred16[MAX_SB_SQUARE * sizeof(uint16_t)]); - uint8_t *second_pred = get_buf_by_bd(xd, second_pred16); - (void)ref_mv_sub8x8; - - const int have_newmv = have_nearmv_in_inter_mode(mbmi->mode); - const int ref_mv_idx = mbmi->ref_mv_idx + (have_newmv ? 1 : 0); - MV *const best_mv = &x->best_mv.as_mv; - const int search_range = SEARCH_RANGE_8P; - const int sadpb = x->sadperbit16; - // Allow joint search multiple times iteratively for each reference frame - // and break out of the search loop if it couldn't find a better mv. - for (ite = 0; ite < 4; ite++) { - struct buf_2d ref_yv12[2]; - int bestsme = INT_MAX; - MvLimits tmp_mv_limits = x->mv_limits; - int id = ite % 2; // Even iterations search in the first reference frame, - // odd iterations search in the second. The predictor - // found for the 'other' reference frame is factored in. - if (ite >= 2 && cur_mv[!id].as_int == init_mv[!id].as_int) { - if (cur_mv[id].as_int == init_mv[id].as_int) { - break; - } else { - int_mv cur_int_mv, init_int_mv; - cur_int_mv.as_mv.col = cur_mv[id].as_mv.col >> 3; - cur_int_mv.as_mv.row = cur_mv[id].as_mv.col >> 3; - init_int_mv.as_mv.row = init_mv[id].as_mv.row >> 3; - init_int_mv.as_mv.col = init_mv[id].as_mv.col >> 3; - if (cur_int_mv.as_int == init_int_mv.as_int) { - break; - } - } - } - for (ref = 0; ref < 2; ++ref) { - ref_mv[ref] = av1_get_ref_mv(x, ref); - // Swap out the reference frame for a version that's been scaled to - // match the resolution of the current frame, allowing the existing - // motion search code to be used without additional modifications. - if (scaled_ref_frame[ref]) { - int i; - for (i = 0; i < num_planes; i++) - backup_yv12[ref][i] = xd->plane[i].pre[ref]; - av1_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, - NULL, num_planes); - } - } - - assert(IMPLIES(scaled_ref_frame[0] != NULL, - cm->width == scaled_ref_frame[0]->y_crop_width && - cm->height == scaled_ref_frame[0]->y_crop_height)); - assert(IMPLIES(scaled_ref_frame[1] != NULL, - cm->width == scaled_ref_frame[1]->y_crop_width && - cm->height == scaled_ref_frame[1]->y_crop_height)); - - // Initialize based on (possibly scaled) prediction buffers. - ref_yv12[0] = xd->plane[plane].pre[0]; - ref_yv12[1] = xd->plane[plane].pre[1]; - - // Get the prediction block from the 'other' reference frame. - const InterpFilters interp_filters = EIGHTTAP_REGULAR; - - // Since we have scaled the reference frames to match the size of the - // current frame we must use a unit scaling factor during mode selection. - av1_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride, - second_pred, pw, &cur_mv[!id].as_mv, - &cm->sf_identity, pw, ph, &conv_params, - interp_filters, &warp_types[!id], p_col, p_row, - plane, !id, MV_PRECISION_Q3, mi_col * MI_SIZE, - mi_row * MI_SIZE, xd, cm->allow_warped_motion); - - const int order_idx = id != 0; - av1_jnt_comp_weight_assign(cm, mbmi, order_idx, &xd->jcp_param.fwd_offset, - &xd->jcp_param.bck_offset, - &xd->jcp_param.use_jnt_comp_avg, 1); - - // Do full-pixel compound motion search on the current reference frame. - if (id) xd->plane[plane].pre[0] = ref_yv12[id]; - av1_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv); - - // Use the mv result from the single mode as mv predictor. - *best_mv = cur_mv[id].as_mv; - - best_mv->col >>= 3; - best_mv->row >>= 3; - - av1_set_mvcost(x, id, ref_mv_idx); - - // Small-range full-pixel motion search. - bestsme = av1_refining_search_8p_c(x, sadpb, search_range, - &cpi->fn_ptr[bsize], mask, mask_stride, - id, &ref_mv[id].as_mv, second_pred); - if (bestsme < INT_MAX) { - if (mask) - bestsme = av1_get_mvpred_mask_var(x, best_mv, &ref_mv[id].as_mv, - second_pred, mask, mask_stride, id, - &cpi->fn_ptr[bsize], 1); - else - bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv[id].as_mv, - second_pred, &cpi->fn_ptr[bsize], 1); - } - - x->mv_limits = tmp_mv_limits; - - // Restore the pointer to the first (possibly scaled) prediction buffer. - if (id) xd->plane[plane].pre[0] = ref_yv12[0]; - - for (ref = 0; ref < 2; ++ref) { - if (scaled_ref_frame[ref]) { - // Swap back the original buffers for subpel motion search. - for (int i = 0; i < num_planes; i++) { - xd->plane[i].pre[ref] = backup_yv12[ref][i]; - } - // Re-initialize based on unscaled prediction buffers. - ref_yv12[ref] = xd->plane[plane].pre[ref]; - } - } - - // Do sub-pixel compound motion search on the current reference frame. - if (id) xd->plane[plane].pre[0] = ref_yv12[id]; - - if (cpi->common.cur_frame_force_integer_mv) { - x->best_mv.as_mv.row *= 8; - x->best_mv.as_mv.col *= 8; - } - if (bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0) { - int dis; /* TODO: use dis in distortion calculation later. */ - unsigned int sse; - bestsme = cpi->find_fractional_mv_step( - x, cm, mi_row, mi_col, &ref_mv[id].as_mv, - cpi->common.allow_high_precision_mv, x->errorperbit, - &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL, - x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, - mask_stride, id, pw, ph, cpi->sf.use_accurate_subpel_search); - } - - // Restore the pointer to the first prediction buffer. - if (id) xd->plane[plane].pre[0] = ref_yv12[0]; - if (bestsme < last_besterr[id]) { - cur_mv[id].as_mv = *best_mv; - last_besterr[id] = bestsme; - } else { - break; - } - } - - *rate_mv = 0; - - for (ref = 0; ref < 2; ++ref) { - av1_set_mvcost(x, ref, ref_mv_idx); - const int_mv curr_ref_mv = av1_get_ref_mv(x, ref); - *rate_mv += av1_mv_bit_cost(&cur_mv[ref].as_mv, &curr_ref_mv.as_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); - } -} - -static void estimate_ref_frame_costs( - const AV1_COMMON *cm, const MACROBLOCKD *xd, const MACROBLOCK *x, - int segment_id, unsigned int *ref_costs_single, - unsigned int (*ref_costs_comp)[REF_FRAMES]) { - int seg_ref_active = - segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME); - if (seg_ref_active) { - memset(ref_costs_single, 0, REF_FRAMES * sizeof(*ref_costs_single)); - int ref_frame; - for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) - memset(ref_costs_comp[ref_frame], 0, - REF_FRAMES * sizeof((*ref_costs_comp)[0])); - } else { - int intra_inter_ctx = av1_get_intra_inter_context(xd); - ref_costs_single[INTRA_FRAME] = x->intra_inter_cost[intra_inter_ctx][0]; - unsigned int base_cost = x->intra_inter_cost[intra_inter_ctx][1]; - - for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i) - ref_costs_single[i] = base_cost; - - const int ctx_p1 = av1_get_pred_context_single_ref_p1(xd); - const int ctx_p2 = av1_get_pred_context_single_ref_p2(xd); - const int ctx_p3 = av1_get_pred_context_single_ref_p3(xd); - const int ctx_p4 = av1_get_pred_context_single_ref_p4(xd); - const int ctx_p5 = av1_get_pred_context_single_ref_p5(xd); - const int ctx_p6 = av1_get_pred_context_single_ref_p6(xd); - - // Determine cost of a single ref frame, where frame types are represented - // by a tree: - // Level 0: add cost whether this ref is a forward or backward ref - ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p1][0][0]; - ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p1][0][0]; - ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p1][0][0]; - ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p1][0][0]; - ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p1][0][1]; - ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p1][0][1]; - ref_costs_single[ALTREF_FRAME] += x->single_ref_cost[ctx_p1][0][1]; - - // Level 1: if this ref is forward ref, - // add cost whether it is last/last2 or last3/golden - ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p3][2][0]; - ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p3][2][0]; - ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p3][2][1]; - ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p3][2][1]; - - // Level 1: if this ref is backward ref - // then add cost whether this ref is altref or backward ref - ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p2][1][0]; - ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p2][1][0]; - ref_costs_single[ALTREF_FRAME] += x->single_ref_cost[ctx_p2][1][1]; - - // Level 2: further add cost whether this ref is last or last2 - ref_costs_single[LAST_FRAME] += x->single_ref_cost[ctx_p4][3][0]; - ref_costs_single[LAST2_FRAME] += x->single_ref_cost[ctx_p4][3][1]; - - // Level 2: last3 or golden - ref_costs_single[LAST3_FRAME] += x->single_ref_cost[ctx_p5][4][0]; - ref_costs_single[GOLDEN_FRAME] += x->single_ref_cost[ctx_p5][4][1]; - - // Level 2: bwdref or altref2 - ref_costs_single[BWDREF_FRAME] += x->single_ref_cost[ctx_p6][5][0]; - ref_costs_single[ALTREF2_FRAME] += x->single_ref_cost[ctx_p6][5][1]; - - if (cm->reference_mode != SINGLE_REFERENCE) { - // Similar to single ref, determine cost of compound ref frames. - // cost_compound_refs = cost_first_ref + cost_second_ref - const int bwdref_comp_ctx_p = av1_get_pred_context_comp_bwdref_p(xd); - const int bwdref_comp_ctx_p1 = av1_get_pred_context_comp_bwdref_p1(xd); - const int ref_comp_ctx_p = av1_get_pred_context_comp_ref_p(xd); - const int ref_comp_ctx_p1 = av1_get_pred_context_comp_ref_p1(xd); - const int ref_comp_ctx_p2 = av1_get_pred_context_comp_ref_p2(xd); - - const int comp_ref_type_ctx = av1_get_comp_reference_type_context(xd); - unsigned int ref_bicomp_costs[REF_FRAMES] = { 0 }; - - ref_bicomp_costs[LAST_FRAME] = ref_bicomp_costs[LAST2_FRAME] = - ref_bicomp_costs[LAST3_FRAME] = ref_bicomp_costs[GOLDEN_FRAME] = - base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][1]; - ref_bicomp_costs[BWDREF_FRAME] = ref_bicomp_costs[ALTREF2_FRAME] = 0; - ref_bicomp_costs[ALTREF_FRAME] = 0; - - // cost of first ref frame - ref_bicomp_costs[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0]; - ref_bicomp_costs[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][0]; - ref_bicomp_costs[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1]; - ref_bicomp_costs[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p][0][1]; - - ref_bicomp_costs[LAST_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][0]; - ref_bicomp_costs[LAST2_FRAME] += x->comp_ref_cost[ref_comp_ctx_p1][1][1]; - - ref_bicomp_costs[LAST3_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][0]; - ref_bicomp_costs[GOLDEN_FRAME] += x->comp_ref_cost[ref_comp_ctx_p2][2][1]; - - // cost of second ref frame - ref_bicomp_costs[BWDREF_FRAME] += - x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0]; - ref_bicomp_costs[ALTREF2_FRAME] += - x->comp_bwdref_cost[bwdref_comp_ctx_p][0][0]; - ref_bicomp_costs[ALTREF_FRAME] += - x->comp_bwdref_cost[bwdref_comp_ctx_p][0][1]; - - ref_bicomp_costs[BWDREF_FRAME] += - x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][0]; - ref_bicomp_costs[ALTREF2_FRAME] += - x->comp_bwdref_cost[bwdref_comp_ctx_p1][1][1]; - - // cost: if one ref frame is forward ref, the other ref is backward ref - int ref0, ref1; - for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) { - for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) { - ref_costs_comp[ref0][ref1] = - ref_bicomp_costs[ref0] + ref_bicomp_costs[ref1]; - } - } - - // cost: if both ref frames are the same side. - const int uni_comp_ref_ctx_p = av1_get_pred_context_uni_comp_ref_p(xd); - const int uni_comp_ref_ctx_p1 = av1_get_pred_context_uni_comp_ref_p1(xd); - const int uni_comp_ref_ctx_p2 = av1_get_pred_context_uni_comp_ref_p2(xd); - ref_costs_comp[LAST_FRAME][LAST2_FRAME] = - base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] + - x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] + - x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][0]; - ref_costs_comp[LAST_FRAME][LAST3_FRAME] = - base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] + - x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] + - x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] + - x->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][0]; - ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = - base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] + - x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][0] + - x->uni_comp_ref_cost[uni_comp_ref_ctx_p1][1][1] + - x->uni_comp_ref_cost[uni_comp_ref_ctx_p2][2][1]; - ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = - base_cost + x->comp_ref_type_cost[comp_ref_type_ctx][0] + - x->uni_comp_ref_cost[uni_comp_ref_ctx_p][0][1]; - } else { - int ref0, ref1; - for (ref0 = LAST_FRAME; ref0 <= GOLDEN_FRAME; ++ref0) { - for (ref1 = BWDREF_FRAME; ref1 <= ALTREF_FRAME; ++ref1) - ref_costs_comp[ref0][ref1] = 512; - } - ref_costs_comp[LAST_FRAME][LAST2_FRAME] = 512; - ref_costs_comp[LAST_FRAME][LAST3_FRAME] = 512; - ref_costs_comp[LAST_FRAME][GOLDEN_FRAME] = 512; - ref_costs_comp[BWDREF_FRAME][ALTREF_FRAME] = 512; - } - } -} - -static void store_coding_context(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, - int mode_index, - int64_t comp_pred_diff[REFERENCE_MODES], - int skippable) { - MACROBLOCKD *const xd = &x->e_mbd; - - // Take a snapshot of the coding context so it can be - // restored if we decide to encode this way - ctx->skip = x->skip; - ctx->skippable = skippable; - ctx->best_mode_index = mode_index; - ctx->mic = *xd->mi[0]; - ctx->mbmi_ext = *x->mbmi_ext; - ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; - ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE]; - ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT]; -} - -static void setup_buffer_ref_mvs_inter( - const AV1_COMP *const cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, - BLOCK_SIZE block_size, int mi_row, int mi_col, - struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) { - const AV1_COMMON *cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; - MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; - - assert(yv12 != NULL); - - // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this - // use the UV scaling factors. - av1_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf, - num_planes); - - // Gets an initial list of candidate vectors from neighbours and orders them - av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count, - mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row, - mi_col, mbmi_ext->mode_context); - - // Further refinement that is encode side only to test the top few candidates - // in full and choose the best as the centre point for subsequent searches. - // The current implementation doesn't support scaling. - (void)block_size; - av1_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame, - block_size); -} - -static void single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int mi_row, int mi_col, - int ref_idx, int *rate_mv) { - MACROBLOCKD *xd = &x->e_mbd; - const AV1_COMMON *cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MB_MODE_INFO *mbmi = xd->mi[0]; - struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } }; - int bestsme = INT_MAX; - int step_param; - int sadpb = x->sadperbit16; - MV mvp_full; - int ref = mbmi->ref_frame[ref_idx]; - MV ref_mv = av1_get_ref_mv(x, ref_idx).as_mv; - - MvLimits tmp_mv_limits = x->mv_limits; - int cost_list[5]; - - const YV12_BUFFER_CONFIG *scaled_ref_frame = - av1_get_scaled_ref_frame(cpi, ref); - - if (scaled_ref_frame) { - // Swap out the reference frame for a version that's been scaled to - // match the resolution of the current frame, allowing the existing - // full-pixel motion search code to be used without additional - // modifications. - for (int i = 0; i < num_planes; i++) { - backup_yv12[i] = xd->plane[i].pre[ref_idx]; - } - av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL, - num_planes); - } - - av1_set_mvcost( - x, ref_idx, - mbmi->ref_mv_idx + (have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0)); - - // Work out the size of the first step in the mv step search. - // 0 here is maximum length first step. 1 is AOMMAX >> 1 etc. - if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { - // Take the weighted average of the step_params based on the last frame's - // max mv magnitude and that based on the best ref mvs of the current - // block for the given reference. - step_param = - (av1_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) / - 2; - } else { - step_param = cpi->mv_step_param; - } - - if (cpi->sf.adaptive_motion_search && bsize < cm->seq_params.sb_size) { - int boffset = - 2 * (mi_size_wide_log2[cm->seq_params.sb_size] - - AOMMIN(mi_size_high_log2[bsize], mi_size_wide_log2[bsize])); - step_param = AOMMAX(step_param, boffset); - } - - if (cpi->sf.adaptive_motion_search) { - int bwl = mi_size_wide_log2[bsize]; - int bhl = mi_size_high_log2[bsize]; - int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4); - - if (tlevel < 5) { - step_param += 2; - step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 1); - } - - // prev_mv_sad is not setup for dynamically scaled frames. - if (cpi->oxcf.resize_mode != RESIZE_RANDOM) { - int i; - for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) { - if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { - x->pred_mv[ref].row = 0; - x->pred_mv[ref].col = 0; - x->best_mv.as_int = INVALID_MV; - - if (scaled_ref_frame) { - // Swap back the original buffers before returning. - for (int j = 0; j < num_planes; ++j) - xd->plane[j].pre[ref_idx] = backup_yv12[j]; - } - return; - } - } - } - } - - // Note: MV limits are modified here. Always restore the original values - // after full-pixel motion search. - av1_set_mv_search_range(&x->mv_limits, &ref_mv); - - if (mbmi->motion_mode != SIMPLE_TRANSLATION) - mvp_full = mbmi->mv[0].as_mv; - else - mvp_full = ref_mv; - - mvp_full.col >>= 3; - mvp_full.row >>= 3; - - x->best_mv.as_int = x->second_best_mv.as_int = INVALID_MV; - - switch (mbmi->motion_mode) { - case SIMPLE_TRANSLATION: - bestsme = av1_full_pixel_search( - cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, 0, - sadpb, cond_cost_list(cpi, cost_list), &ref_mv, INT_MAX, 1, - (MI_SIZE * mi_col), (MI_SIZE * mi_row), 0); - break; - case OBMC_CAUSAL: - bestsme = av1_obmc_full_pixel_search(cpi, x, &mvp_full, step_param, sadpb, - MAX_MVSEARCH_STEPS - 1 - step_param, - 1, &cpi->fn_ptr[bsize], &ref_mv, - &(x->best_mv.as_mv), 0); - break; - default: assert(0 && "Invalid motion mode!\n"); - } - - if (scaled_ref_frame) { - // Swap back the original buffers for subpel motion search. - for (int i = 0; i < num_planes; i++) { - xd->plane[i].pre[ref_idx] = backup_yv12[i]; - } - } - - x->mv_limits = tmp_mv_limits; - - if (cpi->common.cur_frame_force_integer_mv) { - x->best_mv.as_mv.row *= 8; - x->best_mv.as_mv.col *= 8; - } - const int use_fractional_mv = - bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0; - if (use_fractional_mv) { - int dis; /* TODO: use dis in distortion calculation later. */ - switch (mbmi->motion_mode) { - case SIMPLE_TRANSLATION: - if (cpi->sf.use_accurate_subpel_search) { - int best_mv_var; - const int try_second = x->second_best_mv.as_int != INVALID_MV && - x->second_best_mv.as_int != x->best_mv.as_int; - const int pw = block_size_wide[bsize]; - const int ph = block_size_high[bsize]; - - best_mv_var = cpi->find_fractional_mv_step( - x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv, - x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL, - 0, 0, pw, ph, 1); - - if (try_second) { - const int minc = - AOMMAX(x->mv_limits.col_min * 8, ref_mv.col - MV_MAX); - const int maxc = - AOMMIN(x->mv_limits.col_max * 8, ref_mv.col + MV_MAX); - const int minr = - AOMMAX(x->mv_limits.row_min * 8, ref_mv.row - MV_MAX); - const int maxr = - AOMMIN(x->mv_limits.row_max * 8, ref_mv.row + MV_MAX); - int this_var; - MV best_mv = x->best_mv.as_mv; - - x->best_mv = x->second_best_mv; - if (x->best_mv.as_mv.row * 8 <= maxr && - x->best_mv.as_mv.row * 8 >= minr && - x->best_mv.as_mv.col * 8 <= maxc && - x->best_mv.as_mv.col * 8 >= minc) { - this_var = cpi->find_fractional_mv_step( - x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv, - x->errorperbit, &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, - cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, - &dis, &x->pred_sse[ref], NULL, NULL, 0, 0, pw, ph, 1); - if (this_var < best_mv_var) best_mv = x->best_mv.as_mv; - x->best_mv.as_mv = best_mv; - } - } - } else { - cpi->find_fractional_mv_step( - x, cm, mi_row, mi_col, &ref_mv, cm->allow_high_precision_mv, - x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, - cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list), - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, NULL, - 0, 0, 0, 0, 0); - } - break; - case OBMC_CAUSAL: - av1_find_best_obmc_sub_pixel_tree_up( - x, cm, mi_row, mi_col, &x->best_mv.as_mv, &ref_mv, - cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], - cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_iters_per_step, - x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], 0, - cpi->sf.use_accurate_subpel_search); - break; - default: assert(0 && "Invalid motion mode!\n"); - } - } - *rate_mv = av1_mv_bit_cost(&x->best_mv.as_mv, &ref_mv, x->nmvjointcost, - x->mvcost, MV_COST_WEIGHT); - - if (cpi->sf.adaptive_motion_search && mbmi->motion_mode == SIMPLE_TRANSLATION) - x->pred_mv[ref] = x->best_mv.as_mv; -} - -static INLINE void restore_dst_buf(MACROBLOCKD *xd, BUFFER_SET dst, - const int num_planes) { - int i; - for (i = 0; i < num_planes; i++) { - xd->plane[i].dst.buf = dst.plane[i]; - xd->plane[i].dst.stride = dst.stride[i]; - } -} - -static void build_second_inter_pred(const AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, const MV *other_mv, - int mi_row, int mi_col, const int block, - int ref_idx, uint8_t *second_pred) { - const AV1_COMMON *const cm = &cpi->common; - const int pw = block_size_wide[bsize]; - const int ph = block_size_high[bsize]; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - const int other_ref = mbmi->ref_frame[!ref_idx]; - struct macroblockd_plane *const pd = &xd->plane[0]; - // ic and ir are the 4x4 coordinates of the sub8x8 at index "block" - const int ic = block & 1; - const int ir = (block - ic) >> 1; - const int p_col = ((mi_col * MI_SIZE) >> pd->subsampling_x) + 4 * ic; - const int p_row = ((mi_row * MI_SIZE) >> pd->subsampling_y) + 4 * ir; - const WarpedMotionParams *const wm = &xd->global_motion[other_ref]; - int is_global = is_global_mv_block(xd->mi[0], wm->wmtype); - - // This function should only ever be called for compound modes - assert(has_second_ref(mbmi)); - - const int plane = 0; - struct buf_2d ref_yv12 = xd->plane[plane].pre[!ref_idx]; - - struct scale_factors sf; - av1_setup_scale_factors_for_frame(&sf, ref_yv12.width, ref_yv12.height, - cm->width, cm->height); - - ConvolveParams conv_params = get_conv_params(0, plane, xd->bd); - WarpTypesAllowed warp_types; - warp_types.global_warp_allowed = is_global; - warp_types.local_warp_allowed = mbmi->motion_mode == WARPED_CAUSAL; - - // Get the prediction block from the 'other' reference frame. - av1_build_inter_predictor(ref_yv12.buf, ref_yv12.stride, second_pred, pw, - other_mv, &sf, pw, ph, &conv_params, - mbmi->interp_filters, &warp_types, p_col, p_row, - plane, !ref_idx, MV_PRECISION_Q3, mi_col * MI_SIZE, - mi_row * MI_SIZE, xd, cm->allow_warped_motion); - - av1_jnt_comp_weight_assign(cm, mbmi, 0, &xd->jcp_param.fwd_offset, - &xd->jcp_param.bck_offset, - &xd->jcp_param.use_jnt_comp_avg, 1); -} - -// Search for the best mv for one component of a compound, -// given that the other component is fixed. -static void compound_single_motion_search(const AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, MV *this_mv, - int mi_row, int mi_col, - const uint8_t *second_pred, - const uint8_t *mask, int mask_stride, - int *rate_mv, int ref_idx) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - const int pw = block_size_wide[bsize]; - const int ph = block_size_high[bsize]; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - const int ref = mbmi->ref_frame[ref_idx]; - const int_mv ref_mv = av1_get_ref_mv(x, ref_idx); - struct macroblockd_plane *const pd = &xd->plane[0]; - - struct buf_2d backup_yv12[MAX_MB_PLANE]; - const YV12_BUFFER_CONFIG *const scaled_ref_frame = - av1_get_scaled_ref_frame(cpi, ref); - - // Check that this is either an interinter or an interintra block - assert(has_second_ref(mbmi) || (ref_idx == 0 && is_interintra_mode(mbmi))); - - // Store the first prediction buffer. - struct buf_2d orig_yv12; - if (ref_idx) { - orig_yv12 = pd->pre[0]; - pd->pre[0] = pd->pre[ref_idx]; - } - - if (scaled_ref_frame) { - int i; - // Swap out the reference frame for a version that's been scaled to - // match the resolution of the current frame, allowing the existing - // full-pixel motion search code to be used without additional - // modifications. - for (i = 0; i < num_planes; i++) backup_yv12[i] = xd->plane[i].pre[ref_idx]; - av1_setup_pre_planes(xd, ref_idx, scaled_ref_frame, mi_row, mi_col, NULL, - num_planes); - } - - int bestsme = INT_MAX; - int sadpb = x->sadperbit16; - MV *const best_mv = &x->best_mv.as_mv; - int search_range = SEARCH_RANGE_8P; - - MvLimits tmp_mv_limits = x->mv_limits; - - // Do compound motion search on the current reference frame. - av1_set_mv_search_range(&x->mv_limits, &ref_mv.as_mv); - - // Use the mv result from the single mode as mv predictor. - *best_mv = *this_mv; - - best_mv->col >>= 3; - best_mv->row >>= 3; - - av1_set_mvcost( - x, ref_idx, - mbmi->ref_mv_idx + (have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0)); - - // Small-range full-pixel motion search. - bestsme = av1_refining_search_8p_c(x, sadpb, search_range, - &cpi->fn_ptr[bsize], mask, mask_stride, - ref_idx, &ref_mv.as_mv, second_pred); - if (bestsme < INT_MAX) { - if (mask) - bestsme = - av1_get_mvpred_mask_var(x, best_mv, &ref_mv.as_mv, second_pred, mask, - mask_stride, ref_idx, &cpi->fn_ptr[bsize], 1); - else - bestsme = av1_get_mvpred_av_var(x, best_mv, &ref_mv.as_mv, second_pred, - &cpi->fn_ptr[bsize], 1); - } - - x->mv_limits = tmp_mv_limits; - - if (scaled_ref_frame) { - // Swap back the original buffers for subpel motion search. - for (int i = 0; i < num_planes; i++) { - xd->plane[i].pre[ref_idx] = backup_yv12[i]; - } - } - - if (cpi->common.cur_frame_force_integer_mv) { - x->best_mv.as_mv.row *= 8; - x->best_mv.as_mv.col *= 8; - } - const int use_fractional_mv = - bestsme < INT_MAX && cpi->common.cur_frame_force_integer_mv == 0; - if (use_fractional_mv) { - int dis; /* TODO: use dis in distortion calculation later. */ - unsigned int sse; - bestsme = cpi->find_fractional_mv_step( - x, cm, mi_row, mi_col, &ref_mv.as_mv, - cpi->common.allow_high_precision_mv, x->errorperbit, - &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_iters_per_step, NULL, - x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, mask, mask_stride, - ref_idx, pw, ph, cpi->sf.use_accurate_subpel_search); - } - - // Restore the pointer to the first unscaled prediction buffer. - if (ref_idx) pd->pre[0] = orig_yv12; - - if (bestsme < INT_MAX) *this_mv = *best_mv; - - *rate_mv = 0; - - av1_set_mvcost( - x, ref_idx, - mbmi->ref_mv_idx + (have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0)); - *rate_mv += av1_mv_bit_cost(this_mv, &ref_mv.as_mv, x->nmvjointcost, - x->mvcost, MV_COST_WEIGHT); -} - -// Wrapper for compound_single_motion_search, for the common case -// where the second prediction is also an inter mode. -static void compound_single_motion_search_interinter( - const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *cur_mv, - int mi_row, int mi_col, const uint8_t *mask, int mask_stride, int *rate_mv, - const int block, int ref_idx) { - MACROBLOCKD *xd = &x->e_mbd; - // This function should only ever be called for compound modes - assert(has_second_ref(xd->mi[0])); - - // Prediction buffer from second frame. - DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[MAX_SB_SQUARE]); - uint8_t *second_pred; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16); - else - second_pred = (uint8_t *)second_pred_alloc_16; - - MV *this_mv = &cur_mv[ref_idx].as_mv; - const MV *other_mv = &cur_mv[!ref_idx].as_mv; - - build_second_inter_pred(cpi, x, bsize, other_mv, mi_row, mi_col, block, - ref_idx, second_pred); - - compound_single_motion_search(cpi, x, bsize, this_mv, mi_row, mi_col, - second_pred, mask, mask_stride, rate_mv, - ref_idx); -} - -static void do_masked_motion_search_indexed( - const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv, - const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE bsize, - int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int which) { - // NOTE: which values: 0 - 0 only, 1 - 1 only, 2 - both - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - BLOCK_SIZE sb_type = mbmi->sb_type; - const uint8_t *mask; - const int mask_stride = block_size_wide[bsize]; - - mask = av1_get_compound_type_mask(comp_data, sb_type); - - tmp_mv[0].as_int = cur_mv[0].as_int; - tmp_mv[1].as_int = cur_mv[1].as_int; - if (which == 0 || which == 1) { - compound_single_motion_search_interinter(cpi, x, bsize, tmp_mv, mi_row, - mi_col, mask, mask_stride, rate_mv, - 0, which); - } else if (which == 2) { - joint_motion_search(cpi, x, bsize, tmp_mv, mi_row, mi_col, NULL, mask, - mask_stride, rate_mv, 0); - } -} - -#define USE_DISCOUNT_NEWMV_TEST 0 -#if USE_DISCOUNT_NEWMV_TEST -// In some situations we want to discount the apparent cost of a new motion -// vector. Where there is a subtle motion field and especially where there is -// low spatial complexity then it can be hard to cover the cost of a new motion -// vector in a single block, even if that motion vector reduces distortion. -// However, once established that vector may be usable through the nearest and -// near mv modes to reduce distortion in subsequent blocks and also improve -// visual quality. -#define NEW_MV_DISCOUNT_FACTOR 8 -static INLINE void get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode, - int ref_idx, int ref_mv_idx, - const MV_REFERENCE_FRAME *ref_frame, - const MB_MODE_INFO_EXT *mbmi_ext); -static int discount_newmv_test(const AV1_COMP *const cpi, const MACROBLOCK *x, - PREDICTION_MODE this_mode, int_mv this_mv) { - if (this_mode == NEWMV && this_mv.as_int != 0 && - !cpi->rc.is_src_frame_alt_ref) { - // Only discount new_mv when nearst_mv and all near_mv are zero, and the - // new_mv is not equal to global_mv - const AV1_COMMON *const cm = &cpi->common; - const MACROBLOCKD *const xd = &x->e_mbd; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const MV_REFERENCE_FRAME tmp_ref_frames[2] = { mbmi->ref_frame[0], - NONE_FRAME }; - const uint8_t ref_frame_type = av1_ref_frame_type(tmp_ref_frames); - int_mv nearest_mv; - get_this_mv(&nearest_mv, NEARESTMV, 0, 0, tmp_ref_frames, x->mbmi_ext); - int ret = nearest_mv.as_int == 0; - for (int ref_mv_idx = 0; - ref_mv_idx < x->mbmi_ext->ref_mv_count[ref_frame_type]; ++ref_mv_idx) { - int_mv near_mv; - get_this_mv(&near_mv, NEARMV, 0, ref_mv_idx, tmp_ref_frames, x->mbmi_ext); - ret &= near_mv.as_int == 0; - } - if (cm->global_motion[tmp_ref_frames[0]].wmtype <= TRANSLATION) { - int_mv global_mv; - get_this_mv(&global_mv, GLOBALMV, 0, 0, tmp_ref_frames, x->mbmi_ext); - ret &= global_mv.as_int != this_mv.as_int; - } - return ret; - } - return 0; -} -#endif - -#define LEFT_TOP_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3) -#define RIGHT_BOTTOM_MARGIN ((AOM_BORDER_IN_PIXELS - AOM_INTERP_EXTEND) << 3) - -// TODO(jingning): this mv clamping function should be block size dependent. -static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { - clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN, - xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, - xd->mb_to_top_edge - LEFT_TOP_MARGIN, - xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); -} - -static int estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x, - const BLOCK_SIZE bsize, const uint8_t *pred0, - int stride0, const uint8_t *pred1, int stride1) { - static const BLOCK_SIZE split_qtr[BLOCK_SIZES_ALL] = { - // 4X4 - BLOCK_INVALID, - // 4X8, 8X4, 8X8 - BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4, - // 8X16, 16X8, 16X16 - BLOCK_4X8, BLOCK_8X4, BLOCK_8X8, - // 16X32, 32X16, 32X32 - BLOCK_8X16, BLOCK_16X8, BLOCK_16X16, - // 32X64, 64X32, 64X64 - BLOCK_16X32, BLOCK_32X16, BLOCK_32X32, - // 64x128, 128x64, 128x128 - BLOCK_32X64, BLOCK_64X32, BLOCK_64X64, - // 4X16, 16X4, 8X32 - BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X16, - // 32X8, 16X64, 64X16 - BLOCK_16X4, BLOCK_8X32, BLOCK_32X8 - }; - const struct macroblock_plane *const p = &x->plane[0]; - const uint8_t *src = p->src.buf; - int src_stride = p->src.stride; - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - uint32_t esq[2][4]; - int64_t tl, br; - - const BLOCK_SIZE f_index = split_qtr[bsize]; - assert(f_index != BLOCK_INVALID); - - if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - pred0 = CONVERT_TO_BYTEPTR(pred0); - pred1 = CONVERT_TO_BYTEPTR(pred1); - } - - cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]); - cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred0 + bw / 2, stride0, - &esq[0][1]); - cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride, - pred0 + bh / 2 * stride0, stride0, &esq[0][2]); - cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride, - pred0 + bh / 2 * stride0 + bw / 2, stride0, - &esq[0][3]); - cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]); - cpi->fn_ptr[f_index].vf(src + bw / 2, src_stride, pred1 + bw / 2, stride1, - &esq[1][1]); - cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride, src_stride, - pred1 + bh / 2 * stride1, stride0, &esq[1][2]); - cpi->fn_ptr[f_index].vf(src + bh / 2 * src_stride + bw / 2, src_stride, - pred1 + bh / 2 * stride1 + bw / 2, stride0, - &esq[1][3]); - - tl = ((int64_t)esq[0][0] + esq[0][1] + esq[0][2]) - - ((int64_t)esq[1][0] + esq[1][1] + esq[1][2]); - br = ((int64_t)esq[1][3] + esq[1][1] + esq[1][2]) - - ((int64_t)esq[0][3] + esq[0][1] + esq[0][2]); - return (tl + br > 0); -} - -// Choose the best wedge index and sign -static int64_t pick_wedge(const AV1_COMP *const cpi, const MACROBLOCK *const x, - const BLOCK_SIZE bsize, const uint8_t *const p0, - const int16_t *const residual1, - const int16_t *const diff10, - int *const best_wedge_sign, - int *const best_wedge_index) { - const MACROBLOCKD *const xd = &x->e_mbd; - const struct buf_2d *const src = &x->plane[0].src; - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - const int N = bw * bh; - assert(N >= 64); - int rate; - int64_t dist; - int64_t rd, best_rd = INT64_MAX; - int wedge_index; - int wedge_sign; - int wedge_types = (1 << get_wedge_bits_lookup(bsize)); - const uint8_t *mask; - uint64_t sse; - const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH; - const int bd_round = hbd ? (xd->bd - 8) * 2 : 0; - - DECLARE_ALIGNED(32, int16_t, residual0[MAX_SB_SQUARE]); // src - pred0 - if (hbd) { - aom_highbd_subtract_block(bh, bw, residual0, bw, src->buf, src->stride, - CONVERT_TO_BYTEPTR(p0), bw, xd->bd); - } else { - aom_subtract_block(bh, bw, residual0, bw, src->buf, src->stride, p0, bw); - } - - int64_t sign_limit = ((int64_t)aom_sum_squares_i16(residual0, N) - - (int64_t)aom_sum_squares_i16(residual1, N)) * - (1 << WEDGE_WEIGHT_BITS) / 2; - int16_t *ds = residual0; - - av1_wedge_compute_delta_squares(ds, residual0, residual1, N); - - for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) { - mask = av1_get_contiguous_soft_mask(wedge_index, 0, bsize); - - wedge_sign = av1_wedge_sign_from_residuals(ds, mask, N, sign_limit); - - mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); - sse = av1_wedge_sse_from_residuals(residual1, diff10, mask, N); - sse = ROUND_POWER_OF_TWO(sse, bd_round); - - model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N, - &rate, &dist); - // int rate2; - // int64_t dist2; - // model_rd_with_curvfit(cpi, x, bsize, 0, sse, N, &rate2, &dist2); - // printf("sse %"PRId64": leagacy: %d %"PRId64", curvfit %d %"PRId64"\n", - // sse, rate, dist, rate2, dist2); dist = dist2; - // rate = rate2; - - rate += x->wedge_idx_cost[bsize][wedge_index]; - rd = RDCOST(x->rdmult, rate, dist); - - if (rd < best_rd) { - *best_wedge_index = wedge_index; - *best_wedge_sign = wedge_sign; - best_rd = rd; - } - } - - return best_rd - - RDCOST(x->rdmult, x->wedge_idx_cost[bsize][*best_wedge_index], 0); -} - -// Choose the best wedge index the specified sign -static int64_t pick_wedge_fixed_sign(const AV1_COMP *const cpi, - const MACROBLOCK *const x, - const BLOCK_SIZE bsize, - const int16_t *const residual1, - const int16_t *const diff10, - const int wedge_sign, - int *const best_wedge_index) { - const MACROBLOCKD *const xd = &x->e_mbd; - - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - const int N = bw * bh; - assert(N >= 64); - int rate; - int64_t dist; - int64_t rd, best_rd = INT64_MAX; - int wedge_index; - int wedge_types = (1 << get_wedge_bits_lookup(bsize)); - const uint8_t *mask; - uint64_t sse; - const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH; - const int bd_round = hbd ? (xd->bd - 8) * 2 : 0; - for (wedge_index = 0; wedge_index < wedge_types; ++wedge_index) { - mask = av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize); - sse = av1_wedge_sse_from_residuals(residual1, diff10, mask, N); - sse = ROUND_POWER_OF_TWO(sse, bd_round); - - model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N, - &rate, &dist); - rate += x->wedge_idx_cost[bsize][wedge_index]; - rd = RDCOST(x->rdmult, rate, dist); - - if (rd < best_rd) { - *best_wedge_index = wedge_index; - best_rd = rd; - } - } - return best_rd - - RDCOST(x->rdmult, x->wedge_idx_cost[bsize][*best_wedge_index], 0); -} - -static int64_t pick_interinter_wedge( - const AV1_COMP *const cpi, MACROBLOCK *const x, const BLOCK_SIZE bsize, - const uint8_t *const p0, const uint8_t *const p1, - const int16_t *const residual1, const int16_t *const diff10) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const int bw = block_size_wide[bsize]; - - int64_t rd; - int wedge_index = -1; - int wedge_sign = 0; - - assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize)); - assert(cpi->common.seq_params.enable_masked_compound); - - if (cpi->sf.fast_wedge_sign_estimate) { - wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw); - rd = pick_wedge_fixed_sign(cpi, x, bsize, residual1, diff10, wedge_sign, - &wedge_index); - } else { - rd = pick_wedge(cpi, x, bsize, p0, residual1, diff10, &wedge_sign, - &wedge_index); - } - - mbmi->interinter_comp.wedge_sign = wedge_sign; - mbmi->interinter_comp.wedge_index = wedge_index; - return rd; -} - -static int64_t pick_interinter_seg(const AV1_COMP *const cpi, - MACROBLOCK *const x, const BLOCK_SIZE bsize, - const uint8_t *const p0, - const uint8_t *const p1, - const int16_t *const residual1, - const int16_t *const diff10) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - const int N = 1 << num_pels_log2_lookup[bsize]; - int rate; - int64_t dist; - DIFFWTD_MASK_TYPE cur_mask_type; - int64_t best_rd = INT64_MAX; - DIFFWTD_MASK_TYPE best_mask_type = 0; - const int hbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH; - const int bd_round = hbd ? (xd->bd - 8) * 2 : 0; - DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]); - uint8_t *tmp_mask[2] = { xd->seg_mask, seg_mask }; - // try each mask type and its inverse - for (cur_mask_type = 0; cur_mask_type < DIFFWTD_MASK_TYPES; cur_mask_type++) { - // build mask and inverse - if (hbd) - av1_build_compound_diffwtd_mask_highbd( - tmp_mask[cur_mask_type], cur_mask_type, CONVERT_TO_BYTEPTR(p0), bw, - CONVERT_TO_BYTEPTR(p1), bw, bh, bw, xd->bd); - else - av1_build_compound_diffwtd_mask(tmp_mask[cur_mask_type], cur_mask_type, - p0, bw, p1, bw, bh, bw); - - // compute rd for mask - uint64_t sse = av1_wedge_sse_from_residuals(residual1, diff10, - tmp_mask[cur_mask_type], N); - sse = ROUND_POWER_OF_TWO(sse, bd_round); - - model_rd_sse_fn[MODELRD_TYPE_MASKED_COMPOUND](cpi, x, bsize, 0, sse, N, - &rate, &dist); - const int64_t rd0 = RDCOST(x->rdmult, rate, dist); - - if (rd0 < best_rd) { - best_mask_type = cur_mask_type; - best_rd = rd0; - } - } - mbmi->interinter_comp.mask_type = best_mask_type; - if (best_mask_type == DIFFWTD_38_INV) { - memcpy(xd->seg_mask, seg_mask, N * 2); - } - return best_rd; -} - -static int64_t pick_interintra_wedge(const AV1_COMP *const cpi, - const MACROBLOCK *const x, - const BLOCK_SIZE bsize, - const uint8_t *const p0, - const uint8_t *const p1) { - const MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - assert(is_interintra_wedge_used(bsize)); - assert(cpi->common.seq_params.enable_interintra_compound); - - const struct buf_2d *const src = &x->plane[0].src; - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - DECLARE_ALIGNED(32, int16_t, residual1[MAX_SB_SQUARE]); // src - pred1 - DECLARE_ALIGNED(32, int16_t, diff10[MAX_SB_SQUARE]); // pred1 - pred0 - if (get_bitdepth_data_path_index(xd)) { - aom_highbd_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, - CONVERT_TO_BYTEPTR(p1), bw, xd->bd); - aom_highbd_subtract_block(bh, bw, diff10, bw, CONVERT_TO_BYTEPTR(p1), bw, - CONVERT_TO_BYTEPTR(p0), bw, xd->bd); - } else { - aom_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, p1, bw); - aom_subtract_block(bh, bw, diff10, bw, p1, bw, p0, bw); - } - int wedge_index = -1; - int64_t rd = - pick_wedge_fixed_sign(cpi, x, bsize, residual1, diff10, 0, &wedge_index); - - mbmi->interintra_wedge_sign = 0; - mbmi->interintra_wedge_index = wedge_index; - return rd; -} - -static int64_t pick_interinter_mask(const AV1_COMP *const cpi, MACROBLOCK *x, - const BLOCK_SIZE bsize, - const uint8_t *const p0, - const uint8_t *const p1, - const int16_t *const residual1, - const int16_t *const diff10) { - const COMPOUND_TYPE compound_type = x->e_mbd.mi[0]->interinter_comp.type; - switch (compound_type) { - case COMPOUND_WEDGE: - return pick_interinter_wedge(cpi, x, bsize, p0, p1, residual1, diff10); - case COMPOUND_DIFFWTD: - return pick_interinter_seg(cpi, x, bsize, p0, p1, residual1, diff10); - default: assert(0); return 0; - } -} - -static int interinter_compound_motion_search(const AV1_COMP *const cpi, - MACROBLOCK *x, - const int_mv *const cur_mv, - const BLOCK_SIZE bsize, - const PREDICTION_MODE this_mode, - int mi_row, int mi_col) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - int_mv tmp_mv[2]; - int tmp_rate_mv = 0; - mbmi->interinter_comp.seg_mask = xd->seg_mask; - const INTERINTER_COMPOUND_DATA *compound_data = &mbmi->interinter_comp; - - if (this_mode == NEW_NEWMV) { - do_masked_motion_search_indexed(cpi, x, cur_mv, compound_data, bsize, - mi_row, mi_col, tmp_mv, &tmp_rate_mv, 2); - mbmi->mv[0].as_int = tmp_mv[0].as_int; - mbmi->mv[1].as_int = tmp_mv[1].as_int; - } else if (this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV) { - do_masked_motion_search_indexed(cpi, x, cur_mv, compound_data, bsize, - mi_row, mi_col, tmp_mv, &tmp_rate_mv, 0); - mbmi->mv[0].as_int = tmp_mv[0].as_int; - } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) { - do_masked_motion_search_indexed(cpi, x, cur_mv, compound_data, bsize, - mi_row, mi_col, tmp_mv, &tmp_rate_mv, 1); - mbmi->mv[1].as_int = tmp_mv[1].as_int; - } - return tmp_rate_mv; -} - -static void get_inter_predictors_masked_compound( - const AV1_COMP *const cpi, MACROBLOCK *x, const BLOCK_SIZE bsize, - int mi_row, int mi_col, uint8_t **preds0, uint8_t **preds1, - int16_t *residual1, int16_t *diff10, int *strides) { - const AV1_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - const int bw = block_size_wide[bsize]; - const int bh = block_size_high[bsize]; - int can_use_previous = cm->allow_warped_motion; - // get inter predictors to use for masked compound modes - av1_build_inter_predictors_for_planes_single_buf( - xd, bsize, 0, 0, mi_row, mi_col, 0, preds0, strides, can_use_previous); - av1_build_inter_predictors_for_planes_single_buf( - xd, bsize, 0, 0, mi_row, mi_col, 1, preds1, strides, can_use_previous); - const struct buf_2d *const src = &x->plane[0].src; - if (get_bitdepth_data_path_index(xd)) { - aom_highbd_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, - CONVERT_TO_BYTEPTR(*preds1), bw, xd->bd); - aom_highbd_subtract_block(bh, bw, diff10, bw, CONVERT_TO_BYTEPTR(*preds1), - bw, CONVERT_TO_BYTEPTR(*preds0), bw, xd->bd); - } else { - aom_subtract_block(bh, bw, residual1, bw, src->buf, src->stride, *preds1, - bw); - aom_subtract_block(bh, bw, diff10, bw, *preds1, bw, *preds0, bw); - } -} - -static int64_t build_and_cost_compound_type( - const AV1_COMP *const cpi, MACROBLOCK *x, const int_mv *const cur_mv, - const BLOCK_SIZE bsize, const PREDICTION_MODE this_mode, int *rs2, - int rate_mv, BUFFER_SET *ctx, int *out_rate_mv, uint8_t **preds0, - uint8_t **preds1, int16_t *residual1, int16_t *diff10, int *strides, - int mi_row, int mi_col, int mode_rate, int64_t ref_best_rd, - int *calc_pred_masked_compound) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - int rate_sum; - int64_t dist_sum; - int64_t best_rd_cur = INT64_MAX; - int64_t rd = INT64_MAX; - int tmp_skip_txfm_sb; - int64_t tmp_skip_sse_sb; - const COMPOUND_TYPE compound_type = mbmi->interinter_comp.type; - - if (*calc_pred_masked_compound) { - get_inter_predictors_masked_compound(cpi, x, bsize, mi_row, mi_col, preds0, - preds1, residual1, diff10, strides); - *calc_pred_masked_compound = 0; - } - - best_rd_cur = - pick_interinter_mask(cpi, x, bsize, *preds0, *preds1, residual1, diff10); - *rs2 += get_interinter_compound_mask_rate(x, mbmi); - best_rd_cur += RDCOST(x->rdmult, *rs2 + rate_mv, 0); - - // Although the true rate_mv might be different after motion search, but it - // is unlikely to be the best mode considering the transform rd cost and other - // mode overhead cost - int64_t mode_rd = RDCOST(x->rdmult, *rs2 + mode_rate, 0); - if (mode_rd > ref_best_rd) return INT64_MAX; - - if (have_newmv_in_inter_mode(this_mode) && compound_type == COMPOUND_WEDGE) { - *out_rate_mv = interinter_compound_motion_search(cpi, x, cur_mv, bsize, - this_mode, mi_row, mi_col); - av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize); - model_rd_sb_fn[MODELRD_TYPE_MASKED_COMPOUND]( - cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum, - &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL); - rd = RDCOST(x->rdmult, *rs2 + *out_rate_mv + rate_sum, dist_sum); - if (rd >= best_rd_cur) { - mbmi->mv[0].as_int = cur_mv[0].as_int; - mbmi->mv[1].as_int = cur_mv[1].as_int; - *out_rate_mv = rate_mv; - av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0, strides, - preds1, strides); - } - rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, - &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); - if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, *rs2 + *out_rate_mv + rate_sum, dist_sum); - best_rd_cur = rd; - - } else { - av1_build_wedge_inter_predictor_from_buf(xd, bsize, 0, 0, preds0, strides, - preds1, strides); - rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, - &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); - if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, *rs2 + rate_mv + rate_sum, dist_sum); - best_rd_cur = rd; - } - return best_rd_cur; -} - -typedef struct { - // OBMC secondary prediction buffers and respective strides - uint8_t *above_pred_buf[MAX_MB_PLANE]; - int above_pred_stride[MAX_MB_PLANE]; - uint8_t *left_pred_buf[MAX_MB_PLANE]; - int left_pred_stride[MAX_MB_PLANE]; - int_mv (*single_newmv)[REF_FRAMES]; - // Pointer to array of motion vectors to use for each ref and their rates - // Should point to first of 2 arrays in 2D array - int (*single_newmv_rate)[REF_FRAMES]; - int (*single_newmv_valid)[REF_FRAMES]; - // Pointer to array of predicted rate-distortion - // Should point to first of 2 arrays in 2D array - int64_t (*modelled_rd)[MAX_REF_MV_SERCH][REF_FRAMES]; - InterpFilter single_filter[MB_MODE_COUNT][REF_FRAMES]; - int ref_frame_cost; - int single_comp_cost; - int64_t (*simple_rd)[MAX_REF_MV_SERCH][REF_FRAMES]; - int skip_motion_mode; - INTERINTRA_MODE *inter_intra_mode; -} HandleInterModeArgs; - -/* If the current mode shares the same mv with other modes with higher cost, - * skip this mode. */ -static int skip_repeated_mv(const AV1_COMMON *const cm, - const MACROBLOCK *const x, - PREDICTION_MODE this_mode, - const MV_REFERENCE_FRAME ref_frames[2], - InterModeSearchState *search_state) { - const int is_comp_pred = ref_frames[1] > INTRA_FRAME; - const uint8_t ref_frame_type = av1_ref_frame_type(ref_frames); - const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; - const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type]; - PREDICTION_MODE compare_mode = MB_MODE_COUNT; - if (!is_comp_pred) { - if (this_mode == NEARMV) { - if (ref_mv_count == 0) { - // NEARMV has the same motion vector as NEARESTMV - compare_mode = NEARESTMV; - } - if (ref_mv_count == 1 && - cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) { - // NEARMV has the same motion vector as GLOBALMV - compare_mode = GLOBALMV; - } - } - if (this_mode == GLOBALMV) { - if (ref_mv_count == 0 && - cm->global_motion[ref_frames[0]].wmtype <= TRANSLATION) { - // GLOBALMV has the same motion vector as NEARESTMV - compare_mode = NEARESTMV; - } - if (ref_mv_count == 1) { - // GLOBALMV has the same motion vector as NEARMV - compare_mode = NEARMV; - } - } - - if (compare_mode != MB_MODE_COUNT) { - // Use modelled_rd to check whether compare mode was searched - if (search_state->modelled_rd[compare_mode][0][ref_frames[0]] != - INT64_MAX) { - const int16_t mode_ctx = - av1_mode_context_analyzer(mbmi_ext->mode_context, ref_frames); - const int compare_cost = cost_mv_ref(x, compare_mode, mode_ctx); - const int this_cost = cost_mv_ref(x, this_mode, mode_ctx); - - // Only skip if the mode cost is larger than compare mode cost - if (this_cost > compare_cost) { - search_state->modelled_rd[this_mode][0][ref_frames[0]] = - search_state->modelled_rd[compare_mode][0][ref_frames[0]]; - return 1; - } - } - } - } - return 0; -} - -static INLINE int clamp_and_check_mv(int_mv *out_mv, int_mv in_mv, - const AV1_COMMON *cm, - const MACROBLOCK *x) { - const MACROBLOCKD *const xd = &x->e_mbd; - *out_mv = in_mv; - lower_mv_precision(&out_mv->as_mv, cm->allow_high_precision_mv, - cm->cur_frame_force_integer_mv); - clamp_mv2(&out_mv->as_mv, xd); - return !mv_check_bounds(&x->mv_limits, &out_mv->as_mv); -} - -static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x, - const BLOCK_SIZE bsize, int_mv *cur_mv, - const int mi_row, const int mi_col, - int *const rate_mv, - HandleInterModeArgs *const args) { - const MACROBLOCKD *const xd = &x->e_mbd; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const int is_comp_pred = has_second_ref(mbmi); - const PREDICTION_MODE this_mode = mbmi->mode; - const int refs[2] = { mbmi->ref_frame[0], - mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1] }; - const int ref_mv_idx = mbmi->ref_mv_idx; - int i; - - (void)args; - - if (is_comp_pred) { - if (this_mode == NEW_NEWMV) { - cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int; - cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int; - - if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { - joint_motion_search(cpi, x, bsize, cur_mv, mi_row, mi_col, NULL, NULL, - 0, rate_mv, 0); - } else { - *rate_mv = 0; - for (i = 0; i < 2; ++i) { - const int_mv ref_mv = av1_get_ref_mv(x, i); - av1_set_mvcost(x, i, mbmi->ref_mv_idx); - *rate_mv += - av1_mv_bit_cost(&cur_mv[i].as_mv, &ref_mv.as_mv, x->nmvjointcost, - x->mvcost, MV_COST_WEIGHT); - } - } - } else if (this_mode == NEAREST_NEWMV || this_mode == NEAR_NEWMV) { - cur_mv[1].as_int = args->single_newmv[ref_mv_idx][refs[1]].as_int; - if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { - compound_single_motion_search_interinter( - cpi, x, bsize, cur_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 1); - } else { - av1_set_mvcost(x, 1, - mbmi->ref_mv_idx + (this_mode == NEAR_NEWMV ? 1 : 0)); - const int_mv ref_mv = av1_get_ref_mv(x, 1); - *rate_mv = av1_mv_bit_cost(&cur_mv[1].as_mv, &ref_mv.as_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); - } - } else { - assert(this_mode == NEW_NEARESTMV || this_mode == NEW_NEARMV); - cur_mv[0].as_int = args->single_newmv[ref_mv_idx][refs[0]].as_int; - if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { - compound_single_motion_search_interinter( - cpi, x, bsize, cur_mv, mi_row, mi_col, NULL, 0, rate_mv, 0, 0); - } else { - const int_mv ref_mv = av1_get_ref_mv(x, 0); - av1_set_mvcost(x, 0, - mbmi->ref_mv_idx + (this_mode == NEW_NEARMV ? 1 : 0)); - *rate_mv = av1_mv_bit_cost(&cur_mv[0].as_mv, &ref_mv.as_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); - } - } - } else { - single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, rate_mv); - if (x->best_mv.as_int == INVALID_MV) return INT64_MAX; - - args->single_newmv[ref_mv_idx][refs[0]] = x->best_mv; - args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv; - args->single_newmv_valid[ref_mv_idx][refs[0]] = 1; - - cur_mv[0].as_int = x->best_mv.as_int; - -#if USE_DISCOUNT_NEWMV_TEST - // Estimate the rate implications of a new mv but discount this - // under certain circumstances where we want to help initiate a weak - // motion field, where the distortion gain for a single block may not - // be enough to overcome the cost of a new mv. - if (discount_newmv_test(cpi, x, this_mode, x->best_mv)) { - *rate_mv = AOMMAX(*rate_mv / NEW_MV_DISCOUNT_FACTOR, 1); - } -#endif - } - - return 0; -} - -static INLINE void swap_dst_buf(MACROBLOCKD *xd, const BUFFER_SET *dst_bufs[2], - int num_planes) { - const BUFFER_SET *buf0 = dst_bufs[0]; - dst_bufs[0] = dst_bufs[1]; - dst_bufs[1] = buf0; - restore_dst_buf(xd, *dst_bufs[0], num_planes); -} - -static INLINE int get_switchable_rate(MACROBLOCK *const x, - const InterpFilters filters, - const int ctx[2]) { - int inter_filter_cost; - const InterpFilter filter0 = av1_extract_interp_filter(filters, 0); - const InterpFilter filter1 = av1_extract_interp_filter(filters, 1); - inter_filter_cost = x->switchable_interp_costs[ctx[0]][filter0]; - inter_filter_cost += x->switchable_interp_costs[ctx[1]][filter1]; - return SWITCHABLE_INTERP_RATE_FACTOR * inter_filter_cost; -} - -// calculate the rdcost of given interpolation_filter -static INLINE int64_t interpolation_filter_rd( - MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize, - int mi_row, int mi_col, BUFFER_SET *const orig_dst, int64_t *const rd, - int *const switchable_rate, int *const skip_txfm_sb, - int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2], int filter_idx, - const int switchable_ctx[2], const int skip_pred, int *rate, - int64_t *dist) { - const AV1_COMMON *cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - int tmp_rate[2], tmp_skip_sb[2] = { 1, 1 }; - int64_t tmp_dist[2], tmp_skip_sse[2] = { 0, 0 }; - - const InterpFilters last_best = mbmi->interp_filters; - mbmi->interp_filters = filter_sets[filter_idx]; - const int tmp_rs = - get_switchable_rate(x, mbmi->interp_filters, switchable_ctx); - - assert(skip_pred != 2); - assert((skip_pred >= 0) && (skip_pred <= cpi->default_interp_skip_flags)); - assert(rate[0] >= 0); - assert(dist[0] >= 0); - assert((skip_txfm_sb[0] == 0) || (skip_txfm_sb[0] == 1)); - assert(skip_sse_sb[0] >= 0); - assert(rate[1] >= 0); - assert(dist[1] >= 0); - assert((skip_txfm_sb[1] == 0) || (skip_txfm_sb[1] == 1)); - assert(skip_sse_sb[1] >= 0); - - if (skip_pred != cpi->default_interp_skip_flags) { - if (skip_pred != DEFAULT_LUMA_INTERP_SKIP_FLAG) { - av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, bsize); -#if CONFIG_COLLECT_RD_STATS == 3 - RD_STATS rd_stats_y; - select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col, INT64_MAX); - PrintPredictionUnitStats(cpi, x, &rd_stats_y, bsize); -#endif // CONFIG_COLLECT_RD_STATS == 3 - model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER]( - cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &tmp_rate[0], &tmp_dist[0], - &tmp_skip_sb[0], &tmp_skip_sse[0], NULL, NULL, NULL); - tmp_rate[1] = tmp_rate[0]; - tmp_dist[1] = tmp_dist[0]; - } else { - // only luma MC is skipped - tmp_rate[1] = rate[0]; - tmp_dist[1] = dist[0]; - } - if (num_planes > 1) { - for (int plane = 1; plane < num_planes; ++plane) { - int tmp_rate_uv, tmp_skip_sb_uv; - int64_t tmp_dist_uv, tmp_skip_sse_uv; - int64_t tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate[1], tmp_dist[1]); - if (tmp_rd >= *rd) { - mbmi->interp_filters = last_best; - return 0; - } - av1_build_inter_predictors_sbp(cm, xd, mi_row, mi_col, orig_dst, bsize, - plane); - model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER]( - cpi, bsize, x, xd, plane, plane, mi_row, mi_col, &tmp_rate_uv, - &tmp_dist_uv, &tmp_skip_sb_uv, &tmp_skip_sse_uv, NULL, NULL, NULL); - tmp_rate[1] = - (int)AOMMIN(((int64_t)tmp_rate[1] + (int64_t)tmp_rate_uv), INT_MAX); - tmp_dist[1] += tmp_dist_uv; - tmp_skip_sb[1] &= tmp_skip_sb_uv; - tmp_skip_sse[1] += tmp_skip_sse_uv; - } - } - } else { - // both luma and chroma MC is skipped - tmp_rate[1] = rate[1]; - tmp_dist[1] = dist[1]; - } - int64_t tmp_rd = RDCOST(x->rdmult, tmp_rs + tmp_rate[1], tmp_dist[1]); - - if (tmp_rd < *rd) { - *rd = tmp_rd; - *switchable_rate = tmp_rs; - if (skip_pred != cpi->default_interp_skip_flags) { - if (skip_pred == 0) { - // Overwrite the data as current filter is the best one - tmp_skip_sb[1] = tmp_skip_sb[0] & tmp_skip_sb[1]; - tmp_skip_sse[1] = tmp_skip_sse[0] + tmp_skip_sse[1]; - memcpy(rate, tmp_rate, sizeof(*rate) * 2); - memcpy(dist, tmp_dist, sizeof(*dist) * 2); - memcpy(skip_txfm_sb, tmp_skip_sb, sizeof(*skip_txfm_sb) * 2); - memcpy(skip_sse_sb, tmp_skip_sse, sizeof(*skip_sse_sb) * 2); - // As luma MC data is computed, no need to recompute after the search - x->recalc_luma_mc_data = 0; - } else if (skip_pred == DEFAULT_LUMA_INTERP_SKIP_FLAG) { - // As luma MC data is not computed, update of luma data can be skipped - rate[1] = tmp_rate[1]; - dist[1] = tmp_dist[1]; - skip_txfm_sb[1] = skip_txfm_sb[0] & tmp_skip_sb[1]; - skip_sse_sb[1] = skip_sse_sb[0] + tmp_skip_sse[1]; - // As luma MC data is not recomputed and current filter is the best, - // indicate the possibility of recomputing MC data - // If current buffer contains valid MC data, toggle to indicate that - // luma MC data needs to be recomputed - x->recalc_luma_mc_data ^= 1; - } - swap_dst_buf(xd, dst_bufs, num_planes); - } - return 1; - } - mbmi->interp_filters = last_best; - return 0; -} - -// Find the best rd filter in horizontal direction -static INLINE int find_best_horiz_interp_filter_rd( - MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize, - int mi_row, int mi_col, BUFFER_SET *const orig_dst, int64_t *const rd, - int *const switchable_rate, int *const skip_txfm_sb, - int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2], - const int switchable_ctx[2], const int skip_hor, int *rate, int64_t *dist, - int best_dual_mode) { - int i; - const int bw = block_size_wide[bsize]; - assert(best_dual_mode == 0); - if ((bw <= 4) && (skip_hor != cpi->default_interp_skip_flags)) { - int skip_pred = cpi->default_interp_skip_flags; - // Process the filters in reverse order to enable reusing rate and - // distortion (calcuated during EIGHTTAP_REGULAR) for MULTITAP_SHARP - for (i = (SWITCHABLE_FILTERS - 1); i >= 1; --i) { - if (interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd, - switchable_rate, skip_txfm_sb, skip_sse_sb, - dst_bufs, i, switchable_ctx, skip_pred, rate, - dist)) { - best_dual_mode = i; - } - skip_pred = skip_hor; - } - } else { - for (i = 1; i < SWITCHABLE_FILTERS; ++i) { - if (interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd, - switchable_rate, skip_txfm_sb, skip_sse_sb, - dst_bufs, i, switchable_ctx, skip_hor, rate, - dist)) { - best_dual_mode = i; - } - } - } - return best_dual_mode; -} - -// Find the best rd filter in vertical direction -static INLINE void find_best_vert_interp_filter_rd( - MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize, - int mi_row, int mi_col, BUFFER_SET *const orig_dst, int64_t *const rd, - int *const switchable_rate, int *const skip_txfm_sb, - int64_t *const skip_sse_sb, const BUFFER_SET *dst_bufs[2], - const int switchable_ctx[2], const int skip_ver, int *rate, int64_t *dist, - int best_dual_mode, int filter_set_size) { - int i; - const int bh = block_size_high[bsize]; - if ((bh <= 4) && (skip_ver != cpi->default_interp_skip_flags)) { - int skip_pred = cpi->default_interp_skip_flags; - // Process the filters in reverse order to enable reusing rate and - // distortion (calcuated during EIGHTTAP_REGULAR) for MULTITAP_SHARP - assert(filter_set_size == DUAL_FILTER_SET_SIZE); - for (i = (filter_set_size - SWITCHABLE_FILTERS + best_dual_mode); - i >= (best_dual_mode + SWITCHABLE_FILTERS); i -= SWITCHABLE_FILTERS) { - interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd, - switchable_rate, skip_txfm_sb, skip_sse_sb, - dst_bufs, i, switchable_ctx, skip_pred, rate, - dist); - skip_pred = skip_ver; - } - } else { - for (i = best_dual_mode + SWITCHABLE_FILTERS; i < filter_set_size; - i += SWITCHABLE_FILTERS) { - interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd, - switchable_rate, skip_txfm_sb, skip_sse_sb, - dst_bufs, i, switchable_ctx, skip_ver, rate, - dist); - } - } -} - -// check if there is saved result match with this search -static INLINE int is_interp_filter_match(const INTERPOLATION_FILTER_STATS *st, - MB_MODE_INFO *const mi) { - for (int i = 0; i < 2; ++i) { - if ((st->ref_frames[i] != mi->ref_frame[i]) || - (st->mv[i].as_int != mi->mv[i].as_int)) { - return 0; - } - } - if (has_second_ref(mi) && st->comp_type != mi->interinter_comp.type) return 0; - return 1; -} - -static INLINE int find_interp_filter_in_stats(MACROBLOCK *x, - MB_MODE_INFO *const mbmi) { - const int comp_idx = mbmi->compound_idx; - const int offset = x->interp_filter_stats_idx[comp_idx]; - for (int j = 0; j < offset; ++j) { - const INTERPOLATION_FILTER_STATS *st = &x->interp_filter_stats[comp_idx][j]; - if (is_interp_filter_match(st, mbmi)) { - mbmi->interp_filters = st->filters; - return j; - } - } - return -1; // no match result found -} - -static INLINE void save_interp_filter_search_stat(MACROBLOCK *x, - MB_MODE_INFO *const mbmi) { - const int comp_idx = mbmi->compound_idx; - const int offset = x->interp_filter_stats_idx[comp_idx]; - if (offset < MAX_INTERP_FILTER_STATS) { - INTERPOLATION_FILTER_STATS stat = { mbmi->interp_filters, - { mbmi->mv[0], mbmi->mv[1] }, - { mbmi->ref_frame[0], - mbmi->ref_frame[1] }, - mbmi->interinter_comp.type }; - x->interp_filter_stats[comp_idx][offset] = stat; - x->interp_filter_stats_idx[comp_idx]++; - } -} - -static int64_t interpolation_filter_search( - MACROBLOCK *const x, const AV1_COMP *const cpi, BLOCK_SIZE bsize, - int mi_row, int mi_col, const BUFFER_SET *const tmp_dst, - BUFFER_SET *const orig_dst, InterpFilter (*const single_filter)[REF_FRAMES], - int64_t *const rd, int *const switchable_rate, int *const skip_txfm_sb, - int64_t *const skip_sse_sb, const int skip_build_pred, - HandleInterModeArgs *args, int64_t ref_best_rd) { - const AV1_COMMON *cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const int need_search = - av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd); - int i; - // Index 0 corresponds to luma rd data and index 1 corresponds to cummulative - // data of all planes - int tmp_rate[2] = { 0, 0 }; - int64_t tmp_dist[2] = { 0, 0 }; - int best_skip_txfm_sb[2] = { 1, 1 }; - int64_t best_skip_sse_sb[2] = { 0, 0 }; - const int ref_frame = xd->mi[0]->ref_frame[0]; - - (void)single_filter; - int match_found = -1; - const InterpFilter assign_filter = cm->interp_filter; - if (cpi->sf.skip_repeat_interpolation_filter_search && need_search) { - match_found = find_interp_filter_in_stats(x, mbmi); - } - if (!need_search || match_found == -1) { - set_default_interp_filters(mbmi, assign_filter); - } - int switchable_ctx[2]; - switchable_ctx[0] = av1_get_pred_context_switchable_interp(xd, 0); - switchable_ctx[1] = av1_get_pred_context_switchable_interp(xd, 1); - *switchable_rate = - get_switchable_rate(x, mbmi->interp_filters, switchable_ctx); - if (!skip_build_pred) - av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); - -#if CONFIG_COLLECT_RD_STATS == 3 - RD_STATS rd_stats_y; - select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col, INT64_MAX); - PrintPredictionUnitStats(cpi, x, &rd_stats_y, bsize); -#endif // CONFIG_COLLECT_RD_STATS == 3 - model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER]( - cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &tmp_rate[0], &tmp_dist[0], - &best_skip_txfm_sb[0], &best_skip_sse_sb[0], NULL, NULL, NULL); - if (num_planes > 1) - model_rd_sb_fn[MODELRD_TYPE_INTERP_FILTER]( - cpi, bsize, x, xd, 1, num_planes - 1, mi_row, mi_col, &tmp_rate[1], - &tmp_dist[1], &best_skip_txfm_sb[1], &best_skip_sse_sb[1], NULL, NULL, - NULL); - tmp_rate[1] = - (int)AOMMIN((int64_t)tmp_rate[0] + (int64_t)tmp_rate[1], INT_MAX); - assert(tmp_rate[1] >= 0); - tmp_dist[1] = tmp_dist[0] + tmp_dist[1]; - best_skip_txfm_sb[1] = best_skip_txfm_sb[0] & best_skip_txfm_sb[1]; - best_skip_sse_sb[1] = best_skip_sse_sb[0] + best_skip_sse_sb[1]; - *rd = RDCOST(x->rdmult, (*switchable_rate + tmp_rate[1]), tmp_dist[1]); - *skip_txfm_sb = best_skip_txfm_sb[1]; - *skip_sse_sb = best_skip_sse_sb[1]; - x->pred_sse[ref_frame] = (unsigned int)(best_skip_sse_sb[0] >> 4); - - if (assign_filter != SWITCHABLE || match_found != -1) { - return 0; - } - if (!need_search) { - assert(mbmi->interp_filters == - av1_broadcast_interp_filter(EIGHTTAP_REGULAR)); - return 0; - } - if (args->modelled_rd != NULL) { - if (has_second_ref(mbmi)) { - const int ref_mv_idx = mbmi->ref_mv_idx; - int refs[2] = { mbmi->ref_frame[0], - (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; - const int mode0 = compound_ref0_mode(mbmi->mode); - const int mode1 = compound_ref1_mode(mbmi->mode); - const int64_t mrd = AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]], - args->modelled_rd[mode1][ref_mv_idx][refs[1]]); - if ((*rd >> 1) > mrd && ref_best_rd < INT64_MAX) { - return INT64_MAX; - } - } - } - - x->recalc_luma_mc_data = 0; - // skip_flag=xx (in binary form) - // Setting 0th flag corresonds to skipping luma MC and setting 1st bt - // corresponds to skipping chroma MC skip_flag=0 corresponds to "Don't skip - // luma and chroma MC" Skip flag=1 corresponds to "Skip Luma MC only" - // Skip_flag=2 is not a valid case - // skip_flag=3 corresponds to "Skip both luma and chroma MC" - int skip_hor = cpi->default_interp_skip_flags; - int skip_ver = cpi->default_interp_skip_flags; - const int is_compound = has_second_ref(mbmi); - assert(is_intrabc_block(mbmi) == 0); - for (int j = 0; j < 1 + is_compound; ++j) { - const RefBuffer *ref_buf = &cm->frame_refs[mbmi->ref_frame[j] - LAST_FRAME]; - const struct scale_factors *const sf = &ref_buf->sf; - // TODO(any): Refine skip flag calculation considering scaling - if (av1_is_scaled(sf)) { - skip_hor = 0; - skip_ver = 0; - break; - } - const MV mv = mbmi->mv[j].as_mv; - int skip_hor_plane = 0; - int skip_ver_plane = 0; - for (int k = 0; k < AOMMAX(1, (num_planes - 1)); ++k) { - struct macroblockd_plane *const pd = &xd->plane[k]; - const int bw = pd->width; - const int bh = pd->height; - const MV mv_q4 = clamp_mv_to_umv_border_sb( - xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); - const int sub_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS; - const int sub_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS; - skip_hor_plane |= ((sub_x == 0) << k); - skip_ver_plane |= ((sub_y == 0) << k); - } - skip_hor = skip_hor & skip_hor_plane; - skip_ver = skip_ver & skip_ver_plane; - // It is not valid that "luma MV is sub-pel, whereas chroma MV is not" - assert(skip_hor != 2); - assert(skip_ver != 2); - } - // When compond prediction type is compound segment wedge, luma MC and chroma - // MC need to go hand in hand as mask generated during luma MC is reuired for - // chroma MC. If skip_hor = 0 and skip_ver = 1, mask used for chroma MC during - // vertical filter decision may be incorrect as temporary MC evaluation - // overwrites the mask. Make skip_ver as 0 for this case so that mask is - // populated during luma MC - if (is_compound && mbmi->compound_idx == 1 && - mbmi->interinter_comp.type == COMPOUND_DIFFWTD) { - assert(mbmi->comp_group_idx == 1); - if (skip_hor == 0 && skip_ver == 1) skip_ver = 0; - } - // do interp_filter search - const int filter_set_size = DUAL_FILTER_SET_SIZE; - restore_dst_buf(xd, *tmp_dst, num_planes); - const BUFFER_SET *dst_bufs[2] = { tmp_dst, orig_dst }; - if (cpi->sf.use_fast_interpolation_filter_search && - cm->seq_params.enable_dual_filter) { - // default to (R,R): EIGHTTAP_REGULARxEIGHTTAP_REGULAR - int best_dual_mode = 0; - // Find best of {R}x{R,Sm,Sh} - // EIGHTTAP_REGULAR mode is calculated beforehand - best_dual_mode = find_best_horiz_interp_filter_rd( - x, cpi, bsize, mi_row, mi_col, orig_dst, rd, switchable_rate, - best_skip_txfm_sb, best_skip_sse_sb, dst_bufs, switchable_ctx, skip_hor, - tmp_rate, tmp_dist, best_dual_mode); - - // From best of horizontal EIGHTTAP_REGULAR modes, check vertical modes - find_best_vert_interp_filter_rd( - x, cpi, bsize, mi_row, mi_col, orig_dst, rd, switchable_rate, - best_skip_txfm_sb, best_skip_sse_sb, dst_bufs, switchable_ctx, skip_ver, - tmp_rate, tmp_dist, best_dual_mode, filter_set_size); - } else { - // EIGHTTAP_REGULAR mode is calculated beforehand - for (i = 1; i < filter_set_size; ++i) { - if (cm->seq_params.enable_dual_filter == 0) { - const int16_t filter_y = filter_sets[i] & 0xffff; - const int16_t filter_x = filter_sets[i] >> 16; - if (filter_x != filter_y) continue; - } - interpolation_filter_rd(x, cpi, bsize, mi_row, mi_col, orig_dst, rd, - switchable_rate, best_skip_txfm_sb, - best_skip_sse_sb, dst_bufs, i, switchable_ctx, 0, - tmp_rate, tmp_dist); - assert(x->recalc_luma_mc_data == 0); - } - } - swap_dst_buf(xd, dst_bufs, num_planes); - // Recompute final MC data if required - if (x->recalc_luma_mc_data == 1) { - // Recomputing final luma MC data is required only if the same was skipped - // in either of the directions Condition below is necessary, but not - // sufficient - assert((skip_hor == 1) || (skip_ver == 1)); - av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, bsize); - } - *skip_txfm_sb = best_skip_txfm_sb[1]; - *skip_sse_sb = best_skip_sse_sb[1]; - x->pred_sse[ref_frame] = (unsigned int)(best_skip_sse_sb[0] >> 4); - - // save search results - if (cpi->sf.skip_repeat_interpolation_filter_search) { - assert(match_found == -1); - save_interp_filter_search_stat(x, mbmi); - } - return 0; -} - -static int txfm_search(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, - int mi_row, int mi_col, RD_STATS *rd_stats, - RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv, - int mode_rate, int64_t ref_best_rd) { - /* - * This function combines y and uv planes' transform search processes - * together, when the prediction is generated. It first does subtration to - * obtain the prediction error. Then it calls - * select_tx_type_yrd/super_block_yrd and inter_block_uvrd sequentially and - * handles the early terminations happen in those functions. At the end, it - * computes the rd_stats/_y/_uv accordingly. - */ - const AV1_COMMON *cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - int skip_txfm_sb = 0; - const int num_planes = av1_num_planes(cm); - const int ref_frame_1 = mbmi->ref_frame[1]; - const int64_t mode_rd = RDCOST(x->rdmult, mode_rate, 0); - const int64_t rd_thresh = - ref_best_rd == INT64_MAX ? INT64_MAX : ref_best_rd - mode_rd; - const int skip_ctx = av1_get_skip_context(xd); - const int64_t min_header_rate = - mode_rate + AOMMIN(x->skip_cost[skip_ctx][0], x->skip_cost[skip_ctx][1]); - // Account for minimum skip and non_skip rd. - // Eventually either one of them will be added to mode_rate - const int64_t min_header_rd_possible = RDCOST(x->rdmult, min_header_rate, 0); - - if (min_header_rd_possible > ref_best_rd) { - av1_invalid_rd_stats(rd_stats_y); - av1_invalid_rd_stats(rd_stats); - return 0; - } - - av1_init_rd_stats(rd_stats); - av1_init_rd_stats(rd_stats_y); - av1_init_rd_stats(rd_stats_uv); - rd_stats->rate = mode_rate; - - if (!cpi->common.all_lossless) - check_block_skip(cpi, bsize, x, xd, 0, num_planes - 1, &skip_txfm_sb); - if (!skip_txfm_sb) { - int64_t non_skip_rdcosty = INT64_MAX; - int64_t skip_rdcosty = INT64_MAX; - int64_t min_rdcosty = INT64_MAX; - int is_cost_valid_uv = 0; - - // cost and distortion - av1_subtract_plane(x, bsize, 0); - if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) { - // Motion mode - select_tx_type_yrd(cpi, x, rd_stats_y, bsize, mi_row, mi_col, rd_thresh); -#if CONFIG_COLLECT_RD_STATS == 2 - PrintPredictionUnitStats(cpi, x, rd_stats_y, bsize); -#endif // CONFIG_COLLECT_RD_STATS == 2 - } else { - super_block_yrd(cpi, x, rd_stats_y, bsize, rd_thresh); - memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size)); - for (int i = 0; i < xd->n4_h * xd->n4_w; ++i) - set_blk_skip(x, 0, i, rd_stats_y->skip); - } - - if (rd_stats_y->rate == INT_MAX) { - av1_invalid_rd_stats(rd_stats); - // TODO(angiebird): check if we need this - // restore_dst_buf(xd, *orig_dst, num_planes); - mbmi->ref_frame[1] = ref_frame_1; - return 0; - } - - av1_merge_rd_stats(rd_stats, rd_stats_y); - - non_skip_rdcosty = RDCOST( - x->rdmult, rd_stats->rate + x->skip_cost[skip_ctx][0], rd_stats->dist); - skip_rdcosty = - RDCOST(x->rdmult, mode_rate + x->skip_cost[skip_ctx][1], rd_stats->sse); - min_rdcosty = AOMMIN(non_skip_rdcosty, skip_rdcosty); - - if (min_rdcosty > ref_best_rd) { - int64_t tokenonly_rdy = - AOMMIN(RDCOST(x->rdmult, rd_stats_y->rate, rd_stats_y->dist), - RDCOST(x->rdmult, 0, rd_stats_y->sse)); - // Invalidate rd_stats_y to skip the rest of the motion modes search - if (tokenonly_rdy - (tokenonly_rdy >> cpi->sf.adaptive_txb_search_level) > - rd_thresh) - av1_invalid_rd_stats(rd_stats_y); - mbmi->ref_frame[1] = ref_frame_1; - return 0; - } - - if (num_planes > 1) { - /* clang-format off */ - is_cost_valid_uv = - inter_block_uvrd(cpi, x, rd_stats_uv, bsize, - ref_best_rd - non_skip_rdcosty, - ref_best_rd - skip_rdcosty, FTXS_NONE); - if (!is_cost_valid_uv) { - mbmi->ref_frame[1] = ref_frame_1; - return 0; - } - /* clang-format on */ - av1_merge_rd_stats(rd_stats, rd_stats_uv); - } else { - av1_init_rd_stats(rd_stats_uv); - } - if (rd_stats->skip) { - rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate; - rd_stats_y->rate = 0; - rd_stats_uv->rate = 0; - rd_stats->rate += x->skip_cost[skip_ctx][1]; - mbmi->skip = 0; - // here mbmi->skip temporarily plays a role as what this_skip2 does - - int64_t tmprd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); - if (tmprd > ref_best_rd) { - mbmi->ref_frame[1] = ref_frame_1; - return 0; - } - } else if (!xd->lossless[mbmi->segment_id] && - (RDCOST(x->rdmult, - rd_stats_y->rate + rd_stats_uv->rate + - x->skip_cost[skip_ctx][0], - rd_stats->dist) >= - RDCOST(x->rdmult, x->skip_cost[skip_ctx][1], rd_stats->sse))) { - rd_stats->rate -= rd_stats_uv->rate + rd_stats_y->rate; - rd_stats->rate += x->skip_cost[skip_ctx][1]; - rd_stats->dist = rd_stats->sse; - rd_stats_y->rate = 0; - rd_stats_uv->rate = 0; - mbmi->skip = 1; - } else { - rd_stats->rate += x->skip_cost[skip_ctx][0]; - mbmi->skip = 0; - } - } else { - x->skip = 1; - mbmi->tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode); - // The cost of skip bit needs to be added. - mbmi->skip = 0; - rd_stats->rate += x->skip_cost[skip_ctx][1]; - - rd_stats->dist = 0; - rd_stats->sse = 0; - rd_stats_y->rate = 0; - rd_stats_uv->rate = 0; - rd_stats->skip = 1; - int64_t tmprd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); - if (tmprd > ref_best_rd) { - mbmi->ref_frame[1] = ref_frame_1; - return 0; - } - } - return 1; -} - -static int handle_inter_intra_mode(const AV1_COMP *const cpi, - MACROBLOCK *const x, BLOCK_SIZE bsize, - int mi_row, int mi_col, MB_MODE_INFO *mbmi, - HandleInterModeArgs *args, - int64_t ref_best_rd, int *rate_mv, - int *tmp_rate2, BUFFER_SET *orig_dst) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *xd = &x->e_mbd; - - INTERINTRA_MODE best_interintra_mode = II_DC_PRED; - int64_t rd, best_interintra_rd = INT64_MAX; - int rmode, rate_sum; - int64_t dist_sum; - int tmp_rate_mv = 0; - int tmp_skip_txfm_sb; - int bw = block_size_wide[bsize]; - int64_t tmp_skip_sse_sb; - DECLARE_ALIGNED(16, uint8_t, tmp_buf_[2 * MAX_INTERINTRA_SB_SQUARE]); - DECLARE_ALIGNED(16, uint8_t, intrapred_[2 * MAX_INTERINTRA_SB_SQUARE]); - uint8_t *tmp_buf = get_buf_by_bd(xd, tmp_buf_); - uint8_t *intrapred = get_buf_by_bd(xd, intrapred_); - const int *const interintra_mode_cost = - x->interintra_mode_cost[size_group_lookup[bsize]]; - const int_mv mv0 = mbmi->mv[0]; - const int is_wedge_used = is_interintra_wedge_used(bsize); - int rwedge = is_wedge_used ? x->wedge_interintra_cost[bsize][0] : 0; - mbmi->ref_frame[1] = NONE_FRAME; - xd->plane[0].dst.buf = tmp_buf; - xd->plane[0].dst.stride = bw; - av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, NULL, bsize); - - restore_dst_buf(xd, *orig_dst, num_planes); - mbmi->ref_frame[1] = INTRA_FRAME; - mbmi->use_wedge_interintra = 0; - best_interintra_mode = args->inter_intra_mode[mbmi->ref_frame[0]]; - int j = 0; - if (cpi->sf.reuse_inter_intra_mode == 0 || - best_interintra_mode == INTERINTRA_MODES) { - for (j = 0; j < INTERINTRA_MODES; ++j) { - mbmi->interintra_mode = (INTERINTRA_MODE)j; - rmode = interintra_mode_cost[mbmi->interintra_mode]; - av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst, - intrapred, bw); - av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); - model_rd_sb_fn[MODELRD_TYPE_INTERINTRA]( - cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum, - &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL); - rd = RDCOST(x->rdmult, tmp_rate_mv + rate_sum + rmode, dist_sum); - if (rd < best_interintra_rd) { - best_interintra_rd = rd; - best_interintra_mode = mbmi->interintra_mode; - } - } - args->inter_intra_mode[mbmi->ref_frame[0]] = best_interintra_mode; - } - if (j == 0 || best_interintra_mode != II_SMOOTH_PRED) { - mbmi->interintra_mode = best_interintra_mode; - rmode = interintra_mode_cost[mbmi->interintra_mode]; - av1_build_intra_predictors_for_interintra(cm, xd, bsize, 0, orig_dst, - intrapred, bw); - av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); - } - rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, - &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); - if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, *rate_mv + rmode + rate_sum + rwedge, dist_sum); - best_interintra_rd = rd; - if (ref_best_rd < INT64_MAX && (best_interintra_rd >> 1) > ref_best_rd) { - return -1; - } - if (is_wedge_used) { - int64_t best_interintra_rd_nowedge = rd; - int64_t best_interintra_rd_wedge = INT64_MAX; - int_mv tmp_mv; - // Disable wedge search if source variance is small - if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh) { - mbmi->use_wedge_interintra = 1; - - rwedge = av1_cost_literal(get_interintra_wedge_bits(bsize)) + - x->wedge_interintra_cost[bsize][1]; - - best_interintra_rd_wedge = - pick_interintra_wedge(cpi, x, bsize, intrapred_, tmp_buf_); - - best_interintra_rd_wedge += - RDCOST(x->rdmult, rmode + *rate_mv + rwedge, 0); - rd = INT64_MAX; - // Refine motion vector. - if (have_newmv_in_inter_mode(mbmi->mode)) { - // get negative of mask - const uint8_t *mask = av1_get_contiguous_soft_mask( - mbmi->interintra_wedge_index, 1, bsize); - tmp_mv = mbmi->mv[0]; - compound_single_motion_search(cpi, x, bsize, &tmp_mv.as_mv, mi_row, - mi_col, intrapred, mask, bw, &tmp_rate_mv, - 0); - if (mbmi->mv[0].as_int != tmp_mv.as_int) { - mbmi->mv[0].as_int = tmp_mv.as_int; - av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, - bsize); - model_rd_sb_fn[MODELRD_TYPE_MASKED_COMPOUND]( - cpi, bsize, x, xd, 0, 0, mi_row, mi_col, &rate_sum, &dist_sum, - &tmp_skip_txfm_sb, &tmp_skip_sse_sb, NULL, NULL, NULL); - rd = RDCOST(x->rdmult, tmp_rate_mv + rmode + rate_sum + rwedge, - dist_sum); - } - } - if (rd >= best_interintra_rd_wedge) { - tmp_mv.as_int = mv0.as_int; - tmp_rate_mv = *rate_mv; - av1_combine_interintra(xd, bsize, 0, tmp_buf, bw, intrapred, bw); - } - // Evaluate closer to true rd - rd = estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, - &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); - if (rd != INT64_MAX) - rd = RDCOST(x->rdmult, rmode + tmp_rate_mv + rwedge + rate_sum, - dist_sum); - best_interintra_rd_wedge = rd; - if (best_interintra_rd_wedge < best_interintra_rd_nowedge) { - mbmi->use_wedge_interintra = 1; - mbmi->mv[0].as_int = tmp_mv.as_int; - *tmp_rate2 += tmp_rate_mv - *rate_mv; - *rate_mv = tmp_rate_mv; - } else { - mbmi->use_wedge_interintra = 0; - mbmi->mv[0].as_int = mv0.as_int; - av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, bsize); - } - } else { - mbmi->use_wedge_interintra = 0; - } - } // if (is_interintra_wedge_used(bsize)) - if (num_planes > 1) { - av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, orig_dst, bsize); - } - return 0; -} - -// TODO(afergs): Refactor the MBMI references in here - there's four -// TODO(afergs): Refactor optional args - add them to a struct or remove -static int64_t motion_mode_rd(const AV1_COMP *const cpi, MACROBLOCK *const x, - BLOCK_SIZE bsize, RD_STATS *rd_stats, - RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv, - int *disable_skip, int mi_row, int mi_col, - HandleInterModeArgs *const args, - int64_t ref_best_rd, const int *refs, - int *rate_mv, BUFFER_SET *orig_dst -#if CONFIG_COLLECT_INTER_MODE_RD_STATS - , - TileDataEnc *tile_data, int64_t *best_est_rd, - int do_tx_search, InterModesInfo *inter_modes_info -#endif -) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - const int is_comp_pred = has_second_ref(mbmi); - const PREDICTION_MODE this_mode = mbmi->mode; - const int rate2_nocoeff = rd_stats->rate; - int best_xskip, best_disable_skip = 0; - RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv; - MB_MODE_INFO base_mbmi, best_mbmi; - uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; - const int rate_mv0 = *rate_mv; - - int interintra_allowed = cm->seq_params.enable_interintra_compound && - is_interintra_allowed(mbmi) && mbmi->compound_idx; - int pts0[SAMPLES_ARRAY_SIZE], pts_inref0[SAMPLES_ARRAY_SIZE]; - - assert(mbmi->ref_frame[1] != INTRA_FRAME); - const MV_REFERENCE_FRAME ref_frame_1 = mbmi->ref_frame[1]; - av1_invalid_rd_stats(&best_rd_stats); - aom_clear_system_state(); - mbmi->num_proj_ref = 1; // assume num_proj_ref >=1 - MOTION_MODE last_motion_mode_allowed = SIMPLE_TRANSLATION; - if (cm->switchable_motion_mode) { - last_motion_mode_allowed = motion_mode_allowed(xd->global_motion, xd, mbmi, - cm->allow_warped_motion); - } - if (last_motion_mode_allowed == WARPED_CAUSAL) { - mbmi->num_proj_ref = findSamples(cm, xd, mi_row, mi_col, pts0, pts_inref0); - } - int total_samples = mbmi->num_proj_ref; - if (total_samples == 0) { - last_motion_mode_allowed = OBMC_CAUSAL; - } - base_mbmi = *mbmi; - - const int switchable_rate = - av1_is_interp_needed(xd) ? av1_get_switchable_rate(cm, x, xd) : 0; - int64_t best_rd = INT64_MAX; - int best_rate_mv = rate_mv0; - for (int mode_index = (int)SIMPLE_TRANSLATION; - mode_index <= (int)last_motion_mode_allowed + interintra_allowed; - mode_index++) { - if (args->skip_motion_mode && mode_index) continue; - int64_t tmp_rd = INT64_MAX; - int tmp_rate2 = rate2_nocoeff; - int is_interintra_mode = mode_index > (int)last_motion_mode_allowed; - int skip_txfm_sb = 0; - int tmp_rate_mv = rate_mv0; - - *mbmi = base_mbmi; - if (is_interintra_mode) { - mbmi->motion_mode = SIMPLE_TRANSLATION; - } else { - mbmi->motion_mode = (MOTION_MODE)mode_index; - assert(mbmi->ref_frame[1] != INTRA_FRAME); - } - - if (mbmi->motion_mode == SIMPLE_TRANSLATION && !is_interintra_mode) { - // SIMPLE_TRANSLATION mode: no need to recalculate. - // The prediction is calculated before motion_mode_rd() is called in - // handle_inter_mode() - } else if (mbmi->motion_mode == OBMC_CAUSAL) { - uint32_t cur_mv = mbmi->mv[0].as_int; - assert(!is_comp_pred); - if (have_newmv_in_inter_mode(this_mode)) { - single_motion_search(cpi, x, bsize, mi_row, mi_col, 0, &tmp_rate_mv); - mbmi->mv[0].as_int = x->best_mv.as_int; -#if USE_DISCOUNT_NEWMV_TEST - if (discount_newmv_test(cpi, x, this_mode, mbmi->mv[0])) { - tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); - } -#endif - tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv; - } - if (mbmi->mv[0].as_int != cur_mv) { - av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); - } - av1_build_obmc_inter_prediction( - cm, xd, mi_row, mi_col, args->above_pred_buf, args->above_pred_stride, - args->left_pred_buf, args->left_pred_stride); - } else if (mbmi->motion_mode == WARPED_CAUSAL) { - int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; - mbmi->motion_mode = WARPED_CAUSAL; - mbmi->wm_params.wmtype = DEFAULT_WMTYPE; - mbmi->interp_filters = av1_broadcast_interp_filter( - av1_unswitchable_filter(cm->interp_filter)); - - memcpy(pts, pts0, total_samples * 2 * sizeof(*pts0)); - memcpy(pts_inref, pts_inref0, total_samples * 2 * sizeof(*pts_inref0)); - // Select the samples according to motion vector difference - if (mbmi->num_proj_ref > 1) { - mbmi->num_proj_ref = selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref, - mbmi->num_proj_ref, bsize); - } - - if (!find_projection(mbmi->num_proj_ref, pts, pts_inref, bsize, - mbmi->mv[0].as_mv.row, mbmi->mv[0].as_mv.col, - &mbmi->wm_params, mi_row, mi_col)) { - // Refine MV for NEWMV mode - assert(!is_comp_pred); - if (have_newmv_in_inter_mode(this_mode)) { - const int_mv mv0 = mbmi->mv[0]; - const WarpedMotionParams wm_params0 = mbmi->wm_params; - int num_proj_ref0 = mbmi->num_proj_ref; - - // Refine MV in a small range. - av1_refine_warped_mv(cpi, x, bsize, mi_row, mi_col, pts0, pts_inref0, - total_samples); - - // Keep the refined MV and WM parameters. - if (mv0.as_int != mbmi->mv[0].as_int) { - const int ref = refs[0]; - const int_mv ref_mv = av1_get_ref_mv(x, 0); - tmp_rate_mv = - av1_mv_bit_cost(&mbmi->mv[0].as_mv, &ref_mv.as_mv, - x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); - - if (cpi->sf.adaptive_motion_search) - x->pred_mv[ref] = mbmi->mv[0].as_mv; - -#if USE_DISCOUNT_NEWMV_TEST - if (discount_newmv_test(cpi, x, this_mode, mbmi->mv[0])) { - tmp_rate_mv = AOMMAX((tmp_rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); - } -#endif - tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv; - } else { - // Restore the old MV and WM parameters. - mbmi->mv[0] = mv0; - mbmi->wm_params = wm_params0; - mbmi->num_proj_ref = num_proj_ref0; - } - } - - av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); - } else { - continue; - } - } else if (is_interintra_mode) { - const int ret = handle_inter_intra_mode( - cpi, x, bsize, mi_row, mi_col, mbmi, args, ref_best_rd, &tmp_rate_mv, - &tmp_rate2, orig_dst); - if (ret < 0) continue; - } - - if (!cpi->common.all_lossless) - check_block_skip(cpi, bsize, x, xd, 0, num_planes - 1, &skip_txfm_sb); - - x->skip = 0; - - rd_stats->dist = 0; - rd_stats->sse = 0; - rd_stats->skip = 1; - rd_stats->rate = tmp_rate2; - if (mbmi->motion_mode != WARPED_CAUSAL) rd_stats->rate += switchable_rate; - if (interintra_allowed) { - rd_stats->rate += x->interintra_cost[size_group_lookup[bsize]] - [mbmi->ref_frame[1] == INTRA_FRAME]; - if (mbmi->ref_frame[1] == INTRA_FRAME) { - rd_stats->rate += x->interintra_mode_cost[size_group_lookup[bsize]] - [mbmi->interintra_mode]; - if (is_interintra_wedge_used(bsize)) { - rd_stats->rate += - x->wedge_interintra_cost[bsize][mbmi->use_wedge_interintra]; - if (mbmi->use_wedge_interintra) { - rd_stats->rate += - av1_cost_literal(get_interintra_wedge_bits(bsize)); - } - } - } - } - if ((last_motion_mode_allowed > SIMPLE_TRANSLATION) && - (mbmi->ref_frame[1] != INTRA_FRAME)) { - if (last_motion_mode_allowed == WARPED_CAUSAL) { - rd_stats->rate += x->motion_mode_cost[bsize][mbmi->motion_mode]; - } else { - rd_stats->rate += x->motion_mode_cost1[bsize][mbmi->motion_mode]; - } - } - - if (!skip_txfm_sb) { -#if CONFIG_COLLECT_INTER_MODE_RD_STATS - int64_t est_rd = 0; - int est_skip = 0; - if (cpi->sf.inter_mode_rd_model_estimation && cm->tile_cols == 1 && - cm->tile_rows == 1) { - InterModeRdModel *md = &tile_data->inter_mode_rd_models[mbmi->sb_type]; - if (md->ready) { - const int64_t curr_sse = get_sse(cpi, x); - est_rd = get_est_rd(tile_data, mbmi->sb_type, x->rdmult, curr_sse, - rd_stats->rate); - est_skip = est_rd * 0.8 > *best_est_rd; - if (est_skip) { - mbmi->ref_frame[1] = ref_frame_1; - continue; - } else { - if (est_rd < *best_est_rd) { - *best_est_rd = est_rd; - } - } - } - } -#endif // CONFIG_COLLECT_INTER_MODE_RD_STATS - } - -#if CONFIG_COLLECT_INTER_MODE_RD_STATS - if (!do_tx_search) { - const int64_t curr_sse = get_sse(cpi, x); - int est_residue_cost = 0; - int64_t est_dist = 0; - const int has_est_rd = get_est_rate_dist(tile_data, bsize, curr_sse, - &est_residue_cost, &est_dist); - (void)has_est_rd; - assert(has_est_rd); - const int mode_rate = rd_stats->rate; - rd_stats->rate += est_residue_cost; - rd_stats->dist = est_dist; - rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); - if (cm->reference_mode == SINGLE_REFERENCE) { - if (!is_comp_pred) { - inter_modes_info_push(inter_modes_info, mode_rate, curr_sse, - rd_stats->rdcost, mbmi); - } - } else { - inter_modes_info_push(inter_modes_info, mode_rate, curr_sse, - rd_stats->rdcost, mbmi); - } - } else { -#endif - int mode_rate = rd_stats->rate; - if (!txfm_search(cpi, x, bsize, mi_row, mi_col, rd_stats, rd_stats_y, - rd_stats_uv, mode_rate, ref_best_rd)) { - if (rd_stats_y->rate == INT_MAX && mode_index == 0) { - return INT64_MAX; - } - continue; - } - if (!skip_txfm_sb) { - const int64_t curr_rd = - RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); - if (curr_rd < ref_best_rd) { - ref_best_rd = curr_rd; - } - *disable_skip = 0; -#if CONFIG_COLLECT_INTER_MODE_RD_STATS - if (cpi->sf.inter_mode_rd_model_estimation) { - const int skip_ctx = av1_get_skip_context(xd); - inter_mode_data_push(tile_data, mbmi->sb_type, rd_stats->sse, - rd_stats->dist, - rd_stats_y->rate + rd_stats_uv->rate + - x->skip_cost[skip_ctx][mbmi->skip]); - } -#endif // CONFIG_COLLECT_INTER_MODE_RD_STATS - } else { - *disable_skip = 1; - } -#if CONFIG_COLLECT_INTER_MODE_RD_STATS - } -#endif - - if (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV) { - if (is_nontrans_global_motion(xd, xd->mi[0])) { - mbmi->interp_filters = av1_broadcast_interp_filter( - av1_unswitchable_filter(cm->interp_filter)); - } - } - - tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); - if (mode_index == 0) - args->simple_rd[this_mode][mbmi->ref_mv_idx][mbmi->ref_frame[0]] = tmp_rd; - if ((mode_index == 0) || (tmp_rd < best_rd)) { - best_mbmi = *mbmi; - best_rd = tmp_rd; - best_rd_stats = *rd_stats; - best_rd_stats_y = *rd_stats_y; - best_rate_mv = tmp_rate_mv; - if (num_planes > 1) best_rd_stats_uv = *rd_stats_uv; - memcpy(best_blk_skip, x->blk_skip, - sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w); - best_xskip = x->skip; - best_disable_skip = *disable_skip; - if (best_xskip) break; - } - } - mbmi->ref_frame[1] = ref_frame_1; - *rate_mv = best_rate_mv; - if (best_rd == INT64_MAX) { - av1_invalid_rd_stats(rd_stats); - restore_dst_buf(xd, *orig_dst, num_planes); - return INT64_MAX; - } - *mbmi = best_mbmi; - *rd_stats = best_rd_stats; - *rd_stats_y = best_rd_stats_y; - if (num_planes > 1) *rd_stats_uv = best_rd_stats_uv; - memcpy(x->blk_skip, best_blk_skip, - sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w); - x->skip = best_xskip; - *disable_skip = best_disable_skip; - - restore_dst_buf(xd, *orig_dst, num_planes); - return 0; -} - -static int64_t skip_mode_rd(RD_STATS *rd_stats, const AV1_COMP *const cpi, - MACROBLOCK *const x, BLOCK_SIZE bsize, int mi_row, - int mi_col, BUFFER_SET *const orig_dst) { - const AV1_COMMON *cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *const xd = &x->e_mbd; - av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, orig_dst, bsize); - - int64_t total_sse = 0; - for (int plane = 0; plane < num_planes; ++plane) { - const struct macroblock_plane *const p = &x->plane[plane]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y); - const int bw = block_size_wide[plane_bsize]; - const int bh = block_size_high[plane_bsize]; - - av1_subtract_plane(x, bsize, plane); - int64_t sse = aom_sum_squares_2d_i16(p->src_diff, bw, bw, bh); - sse = sse << 4; - total_sse += sse; - } - const int skip_mode_ctx = av1_get_skip_mode_context(xd); - rd_stats->dist = rd_stats->sse = total_sse; - rd_stats->rate = x->skip_mode_cost[skip_mode_ctx][1]; - rd_stats->rdcost = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); - - restore_dst_buf(xd, *orig_dst, num_planes); - return 0; -} - -static INLINE int get_ref_mv_offset(PREDICTION_MODE single_mode, - uint8_t ref_mv_idx) { - assert(is_inter_singleref_mode(single_mode)); - int ref_mv_offset; - if (single_mode == NEARESTMV) { - ref_mv_offset = 0; - } else if (single_mode == NEARMV) { - ref_mv_offset = ref_mv_idx + 1; - } else { - ref_mv_offset = -1; - } - return ref_mv_offset; -} - -static INLINE void get_this_mv(int_mv *this_mv, PREDICTION_MODE this_mode, - int ref_idx, int ref_mv_idx, - const MV_REFERENCE_FRAME *ref_frame, - const MB_MODE_INFO_EXT *mbmi_ext) { - const uint8_t ref_frame_type = av1_ref_frame_type(ref_frame); - const int is_comp_pred = ref_frame[1] > INTRA_FRAME; - const PREDICTION_MODE single_mode = - get_single_mode(this_mode, ref_idx, is_comp_pred); - assert(is_inter_singleref_mode(single_mode)); - if (single_mode == NEWMV) { - this_mv->as_int = INVALID_MV; - } else if (single_mode == GLOBALMV) { - *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]]; - } else { - assert(single_mode == NEARMV || single_mode == NEARESTMV); - const int ref_mv_offset = get_ref_mv_offset(single_mode, ref_mv_idx); - if (ref_mv_offset < mbmi_ext->ref_mv_count[ref_frame_type]) { - assert(ref_mv_offset >= 0); - if (ref_idx == 0) { - *this_mv = - mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].this_mv; - } else { - *this_mv = - mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_offset].comp_mv; - } - } else { - *this_mv = mbmi_ext->global_mvs[ref_frame[ref_idx]]; - } - } -} - -// This function update the non-new mv for the current prediction mode -static INLINE int build_cur_mv(int_mv *cur_mv, PREDICTION_MODE this_mode, - const AV1_COMMON *cm, const MACROBLOCK *x) { - const MACROBLOCKD *xd = &x->e_mbd; - const MB_MODE_INFO *mbmi = xd->mi[0]; - const int is_comp_pred = has_second_ref(mbmi); - int ret = 1; - for (int i = 0; i < is_comp_pred + 1; ++i) { - int_mv this_mv; - get_this_mv(&this_mv, this_mode, i, mbmi->ref_mv_idx, mbmi->ref_frame, - x->mbmi_ext); - const PREDICTION_MODE single_mode = - get_single_mode(this_mode, i, is_comp_pred); - if (single_mode == NEWMV) { - cur_mv[i] = this_mv; - } else { - ret &= clamp_and_check_mv(cur_mv + i, this_mv, cm, x); - } - } - return ret; -} - -static INLINE int get_drl_cost(const MB_MODE_INFO *mbmi, - const MB_MODE_INFO_EXT *mbmi_ext, - int (*drl_mode_cost0)[2], - int8_t ref_frame_type) { - int cost = 0; - if (mbmi->mode == NEWMV || mbmi->mode == NEW_NEWMV) { - for (int idx = 0; idx < 2; ++idx) { - if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { - uint8_t drl_ctx = - av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); - cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != idx]; - if (mbmi->ref_mv_idx == idx) return cost; - } - } - return cost; - } - - if (have_nearmv_in_inter_mode(mbmi->mode)) { - for (int idx = 1; idx < 3; ++idx) { - if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) { - uint8_t drl_ctx = - av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx); - cost += drl_mode_cost0[drl_ctx][mbmi->ref_mv_idx != (idx - 1)]; - if (mbmi->ref_mv_idx == (idx - 1)) return cost; - } - } - return cost; - } - return cost; -} - -// Struct for buffers used by compound_type_rd() function. -// For sizes and alignment of these arrays, refer to -// alloc_compound_type_rd_buffers() function. -typedef struct { - uint8_t *pred0; - uint8_t *pred1; - int16_t *residual1; // src - pred1 - int16_t *diff10; // pred1 - pred0 - uint8_t *tmp_best_mask_buf; // backup of the best segmentation mask -} CompoundTypeRdBuffers; - -static int compound_type_rd(const AV1_COMP *const cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int mi_col, int mi_row, - int_mv *cur_mv, int masked_compound_used, - BUFFER_SET *orig_dst, const BUFFER_SET *tmp_dst, - CompoundTypeRdBuffers *buffers, int *rate_mv, - int64_t *rd, RD_STATS *rd_stats, - int64_t ref_best_rd) { - const AV1_COMMON *cm = &cpi->common; - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - const PREDICTION_MODE this_mode = mbmi->mode; - const int bw = block_size_wide[bsize]; - int rate_sum, rs2; - int64_t dist_sum; - - int_mv best_mv[2]; - int best_tmp_rate_mv = *rate_mv; - int tmp_skip_txfm_sb; - int64_t tmp_skip_sse_sb; - INTERINTER_COMPOUND_DATA best_compound_data; - best_compound_data.type = COMPOUND_AVERAGE; - uint8_t *preds0[1] = { buffers->pred0 }; - uint8_t *preds1[1] = { buffers->pred1 }; - int strides[1] = { bw }; - int tmp_rate_mv; - const int num_pix = 1 << num_pels_log2_lookup[bsize]; - const int mask_len = 2 * num_pix * sizeof(uint8_t); - COMPOUND_TYPE cur_type; - int best_compmode_interinter_cost = 0; - int calc_pred_masked_compound = 1; - - best_mv[0].as_int = cur_mv[0].as_int; - best_mv[1].as_int = cur_mv[1].as_int; - *rd = INT64_MAX; - for (cur_type = COMPOUND_AVERAGE; cur_type < COMPOUND_TYPES; cur_type++) { - if (cur_type != COMPOUND_AVERAGE && !masked_compound_used) break; - if (!is_interinter_compound_used(cur_type, bsize)) continue; - tmp_rate_mv = *rate_mv; - int64_t best_rd_cur = INT64_MAX; - mbmi->interinter_comp.type = cur_type; - int masked_type_cost = 0; - - const int comp_group_idx_ctx = get_comp_group_idx_context(xd); - const int comp_index_ctx = get_comp_index_context(cm, xd); - mbmi->compound_idx = 1; - if (cur_type == COMPOUND_AVERAGE) { - mbmi->comp_group_idx = 0; - if (masked_compound_used) { - masked_type_cost += x->comp_group_idx_cost[comp_group_idx_ctx][0]; - } - masked_type_cost += x->comp_idx_cost[comp_index_ctx][1]; - rs2 = masked_type_cost; - const int64_t mode_rd = RDCOST(x->rdmult, rs2 + rd_stats->rate, 0); - if (mode_rd < ref_best_rd) { - av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, orig_dst, bsize); - int64_t est_rd = - estimate_yrd_for_sb(cpi, bsize, x, &rate_sum, &dist_sum, - &tmp_skip_txfm_sb, &tmp_skip_sse_sb, INT64_MAX); - if (est_rd != INT64_MAX) - best_rd_cur = RDCOST(x->rdmult, rs2 + *rate_mv + rate_sum, dist_sum); - } - // use spare buffer for following compound type try - restore_dst_buf(xd, *tmp_dst, 1); - } else { - mbmi->comp_group_idx = 1; - masked_type_cost += x->comp_group_idx_cost[comp_group_idx_ctx][1]; - masked_type_cost += x->compound_type_cost[bsize][cur_type - 1]; - rs2 = masked_type_cost; - if (x->source_variance > cpi->sf.disable_wedge_search_var_thresh && - *rd / 3 < ref_best_rd) { - best_rd_cur = build_and_cost_compound_type( - cpi, x, cur_mv, bsize, this_mode, &rs2, *rate_mv, orig_dst, - &tmp_rate_mv, preds0, preds1, buffers->residual1, buffers->diff10, - strides, mi_row, mi_col, rd_stats->rate, ref_best_rd, - &calc_pred_masked_compound); - } - } - if (best_rd_cur < *rd) { - *rd = best_rd_cur; - best_compound_data = mbmi->interinter_comp; - if (masked_compound_used && cur_type != COMPOUND_TYPES - 1) { - memcpy(buffers->tmp_best_mask_buf, xd->seg_mask, mask_len); - } - best_compmode_interinter_cost = rs2; - if (have_newmv_in_inter_mode(this_mode)) { - if (cur_type == COMPOUND_WEDGE) { - best_tmp_rate_mv = tmp_rate_mv; - best_mv[0].as_int = mbmi->mv[0].as_int; - best_mv[1].as_int = mbmi->mv[1].as_int; - } else { - best_mv[0].as_int = cur_mv[0].as_int; - best_mv[1].as_int = cur_mv[1].as_int; - } - } - } - // reset to original mvs for next iteration - mbmi->mv[0].as_int = cur_mv[0].as_int; - mbmi->mv[1].as_int = cur_mv[1].as_int; - } - if (mbmi->interinter_comp.type != best_compound_data.type) { - mbmi->comp_group_idx = - (best_compound_data.type == COMPOUND_AVERAGE) ? 0 : 1; - mbmi->interinter_comp = best_compound_data; - memcpy(xd->seg_mask, buffers->tmp_best_mask_buf, mask_len); - } - if (have_newmv_in_inter_mode(this_mode)) { - mbmi->mv[0].as_int = best_mv[0].as_int; - mbmi->mv[1].as_int = best_mv[1].as_int; - if (mbmi->interinter_comp.type == COMPOUND_WEDGE) { - rd_stats->rate += best_tmp_rate_mv - *rate_mv; - *rate_mv = best_tmp_rate_mv; - } - } - restore_dst_buf(xd, *orig_dst, 1); - return best_compmode_interinter_cost; -} - -static INLINE int is_single_newmv_valid(HandleInterModeArgs *args, - MB_MODE_INFO *mbmi, - PREDICTION_MODE this_mode) { - for (int ref_idx = 0; ref_idx < 2; ++ref_idx) { - const PREDICTION_MODE single_mode = get_single_mode(this_mode, ref_idx, 1); - const MV_REFERENCE_FRAME ref = mbmi->ref_frame[ref_idx]; - if (single_mode == NEWMV && - args->single_newmv_valid[mbmi->ref_mv_idx][ref] == 0) { - return 0; - } - } - return 1; -} - -static int get_drl_refmv_count(const MACROBLOCK *const x, - const MV_REFERENCE_FRAME *ref_frame, - PREDICTION_MODE mode) { - MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; - const int8_t ref_frame_type = av1_ref_frame_type(ref_frame); - const int has_nearmv = have_nearmv_in_inter_mode(mode) ? 1 : 0; - const int ref_mv_count = mbmi_ext->ref_mv_count[ref_frame_type]; - const int only_newmv = (mode == NEWMV || mode == NEW_NEWMV); - const int has_drl = - (has_nearmv && ref_mv_count > 2) || (only_newmv && ref_mv_count > 1); - const int ref_set = - has_drl ? AOMMIN(MAX_REF_MV_SERCH, ref_mv_count - has_nearmv) : 1; - - return ref_set; -} - -typedef struct { - int64_t rd; - int drl_cost; - int rate_mv; - int_mv mv; -} inter_mode_info; - -static int64_t handle_inter_mode(const AV1_COMP *const cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, RD_STATS *rd_stats, - RD_STATS *rd_stats_y, RD_STATS *rd_stats_uv, - int *disable_skip, int mi_row, int mi_col, - HandleInterModeArgs *args, int64_t ref_best_rd, - uint8_t *const tmp_buf, - CompoundTypeRdBuffers *rd_buffers -#if CONFIG_COLLECT_INTER_MODE_RD_STATS - , - TileDataEnc *tile_data, int64_t *best_est_rd, - const int do_tx_search, - InterModesInfo *inter_modes_info -#endif -) { - const AV1_COMMON *cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *xd = &x->e_mbd; - MB_MODE_INFO *mbmi = xd->mi[0]; - MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; - const int is_comp_pred = has_second_ref(mbmi); - const PREDICTION_MODE this_mode = mbmi->mode; - int i; - int refs[2] = { mbmi->ref_frame[0], - (mbmi->ref_frame[1] < 0 ? 0 : mbmi->ref_frame[1]) }; - int rate_mv = 0; - int64_t rd = INT64_MAX; - - // do first prediction into the destination buffer. Do the next - // prediction into a temporary buffer. Then keep track of which one - // of these currently holds the best predictor, and use the other - // one for future predictions. In the end, copy from tmp_buf to - // dst if necessary. - struct macroblockd_plane *p = xd->plane; - BUFFER_SET orig_dst = { - { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf }, - { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride }, - }; - const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE, - tmp_buf + 2 * MAX_SB_SQUARE }, - { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE } }; - - int skip_txfm_sb = 0; - int64_t skip_sse_sb = INT64_MAX; - int16_t mode_ctx; - const int masked_compound_used = is_any_masked_compound_used(bsize) && - cm->seq_params.enable_masked_compound; - int64_t ret_val = INT64_MAX; - const int8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); - RD_STATS best_rd_stats, best_rd_stats_y, best_rd_stats_uv; - int64_t best_rd = INT64_MAX; - uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; - MB_MODE_INFO best_mbmi = *mbmi; - int best_disable_skip; - int best_xskip; - int64_t newmv_ret_val = INT64_MAX; - int_mv backup_mv[2] = { { 0 } }; - int backup_rate_mv = 0; - inter_mode_info mode_info[MAX_REF_MV_SERCH]; - - int comp_idx; - const int search_jnt_comp = is_comp_pred & cm->seq_params.enable_jnt_comp & - (mbmi->mode != GLOBAL_GLOBALMV); - - // TODO(jingning): This should be deprecated shortly. - const int has_nearmv = have_nearmv_in_inter_mode(mbmi->mode) ? 1 : 0; - const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode); - - for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) { - mode_info[ref_mv_idx].mv.as_int = INVALID_MV; - mode_info[ref_mv_idx].rd = INT64_MAX; - - if (cpi->sf.reduce_inter_modes && ref_mv_idx > 0) { - if (mbmi->ref_frame[0] == LAST2_FRAME || - mbmi->ref_frame[0] == LAST3_FRAME || - mbmi->ref_frame[1] == LAST2_FRAME || - mbmi->ref_frame[1] == LAST3_FRAME) { - if (mbmi_ext->ref_mv_stack[ref_frame_type][ref_mv_idx + has_nearmv] - .weight < REF_CAT_LEVEL) { - continue; - } - } - } - - av1_init_rd_stats(rd_stats); - - mbmi->interinter_comp.type = COMPOUND_AVERAGE; - mbmi->comp_group_idx = 0; - mbmi->compound_idx = 1; - if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME; - - mode_ctx = - av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame); - - mbmi->num_proj_ref = 0; - mbmi->motion_mode = SIMPLE_TRANSLATION; - mbmi->ref_mv_idx = ref_mv_idx; - - if (is_comp_pred && (!is_single_newmv_valid(args, mbmi, this_mode))) { - continue; - } - - rd_stats->rate += args->ref_frame_cost + args->single_comp_cost; - const int drl_cost = - get_drl_cost(mbmi, mbmi_ext, x->drl_mode_cost0, ref_frame_type); - rd_stats->rate += drl_cost; - mode_info[ref_mv_idx].drl_cost = drl_cost; - - if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd && - mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) { - continue; - } - - int64_t best_rd2 = INT64_MAX; - - const RD_STATS backup_rd_stats = *rd_stats; - // If !search_jnt_comp, we need to force mbmi->compound_idx = 1. - for (comp_idx = 1; comp_idx >= !search_jnt_comp; --comp_idx) { - int rs = 0; - int compmode_interinter_cost = 0; - mbmi->compound_idx = comp_idx; - if (is_comp_pred && comp_idx == 0) { - *rd_stats = backup_rd_stats; - mbmi->interinter_comp.type = COMPOUND_AVERAGE; - if (mbmi->ref_frame[1] == INTRA_FRAME) mbmi->ref_frame[1] = NONE_FRAME; - mbmi->num_proj_ref = 0; - mbmi->motion_mode = SIMPLE_TRANSLATION; - mbmi->comp_group_idx = 0; - - const int comp_group_idx_ctx = get_comp_group_idx_context(xd); - const int comp_index_ctx = get_comp_index_context(cm, xd); - if (masked_compound_used) { - compmode_interinter_cost += - x->comp_group_idx_cost[comp_group_idx_ctx][0]; - } - compmode_interinter_cost += x->comp_idx_cost[comp_index_ctx][0]; - } - - int_mv cur_mv[2]; - if (!build_cur_mv(cur_mv, this_mode, cm, x)) { - continue; - } - if (have_newmv_in_inter_mode(this_mode)) { - if (comp_idx == 0) { - cur_mv[0] = backup_mv[0]; - cur_mv[1] = backup_mv[1]; - rate_mv = backup_rate_mv; - } - - // when jnt_comp_skip_mv_search flag is on, new mv will be searched once - if (!(search_jnt_comp && cpi->sf.jnt_comp_skip_mv_search && - comp_idx == 0)) { - newmv_ret_val = handle_newmv(cpi, x, bsize, cur_mv, mi_row, mi_col, - &rate_mv, args); - - // Store cur_mv and rate_mv so that they can be restored in the next - // iteration of the loop - backup_mv[0] = cur_mv[0]; - backup_mv[1] = cur_mv[1]; - backup_rate_mv = rate_mv; - } - - if (newmv_ret_val != 0) { - continue; - } else { - rd_stats->rate += rate_mv; - } - - if (cpi->sf.skip_repeated_newmv) { - if (!is_comp_pred && this_mode == NEWMV && ref_mv_idx > 0) { - int skip = 0; - int this_rate_mv = 0; - for (i = 0; i < ref_mv_idx; ++i) { - // Check if the motion search result same as previous results - if (cur_mv[0].as_int == args->single_newmv[i][refs[0]].as_int) { - // If the compared mode has no valid rd, it is unlikely this - // mode will be the best mode - if (mode_info[i].rd == INT64_MAX) { - skip = 1; - break; - } - // Compare the cost difference including drl cost and mv cost - if (mode_info[i].mv.as_int != INVALID_MV) { - const int compare_cost = - mode_info[i].rate_mv + mode_info[i].drl_cost; - const int_mv ref_mv = av1_get_ref_mv(x, 0); - this_rate_mv = av1_mv_bit_cost(&mode_info[i].mv.as_mv, - &ref_mv.as_mv, x->nmvjointcost, - x->mvcost, MV_COST_WEIGHT); - const int this_cost = this_rate_mv + drl_cost; - - if (compare_cost < this_cost) { - skip = 1; - break; - } else { - // If the cost is less than current best result, make this - // the best and update corresponding variables - if (best_mbmi.ref_mv_idx == i) { - assert(best_rd != INT64_MAX); - best_mbmi.ref_mv_idx = ref_mv_idx; - best_rd_stats.rate += this_cost - compare_cost; - best_rd = RDCOST(x->rdmult, best_rd_stats.rate, - best_rd_stats.dist); - if (best_rd < ref_best_rd) ref_best_rd = best_rd; - - skip = 1; - break; - } - } - } - } - } - if (skip) { - args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = - args->modelled_rd[this_mode][i][refs[0]]; - args->simple_rd[this_mode][ref_mv_idx][refs[0]] = - args->simple_rd[this_mode][i][refs[0]]; - mode_info[ref_mv_idx].rd = mode_info[i].rd; - mode_info[ref_mv_idx].rate_mv = this_rate_mv; - mode_info[ref_mv_idx].mv.as_int = mode_info[i].mv.as_int; - - restore_dst_buf(xd, orig_dst, num_planes); - continue; - } - } - } - } - for (i = 0; i < is_comp_pred + 1; ++i) { - mbmi->mv[i].as_int = cur_mv[i].as_int; - } - const int ref_mv_cost = cost_mv_ref(x, this_mode, mode_ctx); -#if USE_DISCOUNT_NEWMV_TEST - // We don't include the cost of the second reference here, because there - // are only three options: Last/Golden, ARF/Last or Golden/ARF, or in - // other words if you present them in that order, the second one is always - // known if the first is known. - // - // Under some circumstances we discount the cost of new mv mode to - // encourage initiation of a motion field. - if (discount_newmv_test(cpi, x, this_mode, mbmi->mv[0])) { - // discount_newmv_test only applies discount on NEWMV mode. - assert(this_mode == NEWMV); - rd_stats->rate += AOMMIN(cost_mv_ref(x, this_mode, mode_ctx), - cost_mv_ref(x, NEARESTMV, mode_ctx)); - } else { - rd_stats->rate += ref_mv_cost; - } -#else - rd_stats->rate += ref_mv_cost; -#endif - - if (RDCOST(x->rdmult, rd_stats->rate, 0) > ref_best_rd && - mbmi->mode != NEARESTMV && mbmi->mode != NEAREST_NEARESTMV) { - continue; - } - - int skip_build_pred = 0; - if (is_comp_pred && comp_idx) { - // Find matching interp filter or set to default interp filter - const int need_search = - av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd); - int match_found = -1; - const InterpFilter assign_filter = cm->interp_filter; - if (cpi->sf.skip_repeat_interpolation_filter_search && need_search) { - match_found = find_interp_filter_in_stats(x, mbmi); - } - if (!need_search || match_found == -1) { - set_default_interp_filters(mbmi, assign_filter); - } - - int64_t best_rd_compound; - compmode_interinter_cost = compound_type_rd( - cpi, x, bsize, mi_col, mi_row, cur_mv, masked_compound_used, - &orig_dst, &tmp_dst, rd_buffers, &rate_mv, &best_rd_compound, - rd_stats, ref_best_rd); - if (ref_best_rd < INT64_MAX && best_rd_compound / 3 > ref_best_rd) { - restore_dst_buf(xd, orig_dst, num_planes); - continue; - } - // No need to call av1_build_inter_predictors_sby if - // COMPOUND_AVERAGE is selected because it is the first - // candidate in compound_type_rd, and the following - // compound types searching uses tmp_dst buffer - if (mbmi->interinter_comp.type == COMPOUND_AVERAGE) { - if (num_planes > 1) - av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, &orig_dst, - bsize); - skip_build_pred = 1; - } - } - - ret_val = interpolation_filter_search( - x, cpi, bsize, mi_row, mi_col, &tmp_dst, &orig_dst, - args->single_filter, &rd, &rs, &skip_txfm_sb, &skip_sse_sb, - skip_build_pred, args, ref_best_rd); - if (args->modelled_rd != NULL && !is_comp_pred) { - args->modelled_rd[this_mode][ref_mv_idx][refs[0]] = rd; - } - if (ret_val != 0) { - restore_dst_buf(xd, orig_dst, num_planes); - continue; - } else if (cpi->sf.model_based_post_interp_filter_breakout && - ref_best_rd != INT64_MAX && (rd >> 3) * 3 > ref_best_rd) { - restore_dst_buf(xd, orig_dst, num_planes); - if ((rd >> 3) * 2 > ref_best_rd) break; - continue; - } - - if (search_jnt_comp) { - // if 1/2 model rd is larger than best_rd in jnt_comp mode, - // use jnt_comp mode, save additional search - if ((rd >> 3) * 4 > best_rd) { - restore_dst_buf(xd, orig_dst, num_planes); - continue; - } - } - - if (!is_comp_pred) - args->single_filter[this_mode][refs[0]] = - av1_extract_interp_filter(mbmi->interp_filters, 0); - - if (args->modelled_rd != NULL) { - if (is_comp_pred) { - const int mode0 = compound_ref0_mode(this_mode); - const int mode1 = compound_ref1_mode(this_mode); - const int64_t mrd = - AOMMIN(args->modelled_rd[mode0][ref_mv_idx][refs[0]], - args->modelled_rd[mode1][ref_mv_idx][refs[1]]); - if ((rd >> 3) * 6 > mrd && ref_best_rd < INT64_MAX) { - restore_dst_buf(xd, orig_dst, num_planes); - continue; - } - } - } - rd_stats->rate += compmode_interinter_cost; - - if (search_jnt_comp && cpi->sf.jnt_comp_fast_tx_search && comp_idx == 0) { - // TODO(chengchen): this speed feature introduces big loss. - // Need better estimation of rate distortion. - int dummy_rate; - int64_t dummy_dist; - int plane_rate[MAX_MB_PLANE] = { 0 }; - int64_t plane_sse[MAX_MB_PLANE] = { 0 }; - int64_t plane_dist[MAX_MB_PLANE] = { 0 }; - - model_rd_sb_fn[MODELRD_TYPE_JNT_COMPOUND]( - cpi, bsize, x, xd, 0, num_planes - 1, mi_row, mi_col, &dummy_rate, - &dummy_dist, &skip_txfm_sb, &skip_sse_sb, plane_rate, plane_sse, - plane_dist); - - rd_stats->rate += rs; - rd_stats->rate += plane_rate[0] + plane_rate[1] + plane_rate[2]; - rd_stats_y->rate = plane_rate[0]; - rd_stats_uv->rate = plane_rate[1] + plane_rate[2]; - rd_stats->sse = plane_sse[0] + plane_sse[1] + plane_sse[2]; - rd_stats_y->sse = plane_sse[0]; - rd_stats_uv->sse = plane_sse[1] + plane_sse[2]; - rd_stats->dist = plane_dist[0] + plane_dist[1] + plane_dist[2]; - rd_stats_y->dist = plane_dist[0]; - rd_stats_uv->dist = plane_dist[1] + plane_dist[2]; - } else { -#if CONFIG_COLLECT_INTER_MODE_RD_STATS - ret_val = motion_mode_rd( - cpi, x, bsize, rd_stats, rd_stats_y, rd_stats_uv, disable_skip, - mi_row, mi_col, args, ref_best_rd, refs, &rate_mv, &orig_dst, - tile_data, best_est_rd, do_tx_search, inter_modes_info); -#else - ret_val = motion_mode_rd(cpi, x, bsize, rd_stats, rd_stats_y, - rd_stats_uv, disable_skip, mi_row, mi_col, - args, ref_best_rd, refs, &rate_mv, &orig_dst); -#endif - } - mode_info[ref_mv_idx].mv.as_int = mbmi->mv[0].as_int; - mode_info[ref_mv_idx].rate_mv = rate_mv; - if (ret_val != INT64_MAX) { - int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); - mode_info[ref_mv_idx].rd = tmp_rd; - if (tmp_rd < best_rd) { - best_rd_stats = *rd_stats; - best_rd_stats_y = *rd_stats_y; - best_rd_stats_uv = *rd_stats_uv; - best_rd = tmp_rd; - best_mbmi = *mbmi; - best_disable_skip = *disable_skip; - best_xskip = x->skip; - memcpy(best_blk_skip, x->blk_skip, - sizeof(best_blk_skip[0]) * xd->n4_h * xd->n4_w); - } - - if (tmp_rd < best_rd2) { - best_rd2 = tmp_rd; - } - - if (tmp_rd < ref_best_rd) { - ref_best_rd = tmp_rd; - } - } - restore_dst_buf(xd, orig_dst, num_planes); - } - } - - if (best_rd == INT64_MAX) return INT64_MAX; - - // re-instate status of the best choice - *rd_stats = best_rd_stats; - *rd_stats_y = best_rd_stats_y; - *rd_stats_uv = best_rd_stats_uv; - *mbmi = best_mbmi; - *disable_skip = best_disable_skip; - x->skip = best_xskip; - assert(IMPLIES(mbmi->comp_group_idx == 1, - mbmi->interinter_comp.type != COMPOUND_AVERAGE)); - memcpy(x->blk_skip, best_blk_skip, - sizeof(best_blk_skip[0]) * xd->n4_h * xd->n4_w); - - return RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); -} - -static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, - RD_STATS *rd_cost, BLOCK_SIZE bsize, - int64_t best_rd) { - const AV1_COMMON *const cm = &cpi->common; - if (!av1_allow_intrabc(cm)) return INT64_MAX; - const int num_planes = av1_num_planes(cm); - - MACROBLOCKD *const xd = &x->e_mbd; - const TileInfo *tile = &xd->tile; - MB_MODE_INFO *mbmi = xd->mi[0]; - const int mi_row = -xd->mb_to_top_edge / (8 * MI_SIZE); - const int mi_col = -xd->mb_to_left_edge / (8 * MI_SIZE); - const int w = block_size_wide[bsize]; - const int h = block_size_high[bsize]; - const int sb_row = mi_row >> cm->seq_params.mib_size_log2; - const int sb_col = mi_col >> cm->seq_params.mib_size_log2; - - MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; - MV_REFERENCE_FRAME ref_frame = INTRA_FRAME; - av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count, - mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row, - mi_col, mbmi_ext->mode_context); - - int_mv nearestmv, nearmv; - av1_find_best_ref_mvs_from_stack(0, mbmi_ext, ref_frame, &nearestmv, &nearmv, - 0); - - if (nearestmv.as_int == INVALID_MV) { - nearestmv.as_int = 0; - } - if (nearmv.as_int == INVALID_MV) { - nearmv.as_int = 0; - } - - int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv; - if (dv_ref.as_int == 0) - av1_find_ref_dv(&dv_ref, tile, cm->seq_params.mib_size, mi_row, mi_col); - // Ref DV should not have sub-pel. - assert((dv_ref.as_mv.col & 7) == 0); - assert((dv_ref.as_mv.row & 7) == 0); - mbmi_ext->ref_mv_stack[INTRA_FRAME][0].this_mv = dv_ref; - - struct buf_2d yv12_mb[MAX_MB_PLANE]; - av1_setup_pred_block(xd, yv12_mb, xd->cur_buf, mi_row, mi_col, NULL, NULL, - num_planes); - for (int i = 0; i < num_planes; ++i) { - xd->plane[i].pre[0] = yv12_mb[i]; - } - - enum IntrabcMotionDirection { - IBC_MOTION_ABOVE, - IBC_MOTION_LEFT, - IBC_MOTION_DIRECTIONS - }; - - MB_MODE_INFO best_mbmi = *mbmi; - RD_STATS best_rdcost = *rd_cost; - int best_skip = x->skip; - - uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE] = { 0 }; - for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE; - dir < IBC_MOTION_DIRECTIONS; ++dir) { - const MvLimits tmp_mv_limits = x->mv_limits; - switch (dir) { - case IBC_MOTION_ABOVE: - x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE; - x->mv_limits.col_max = (tile->mi_col_end - mi_col) * MI_SIZE - w; - x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE; - x->mv_limits.row_max = - (sb_row * cm->seq_params.mib_size - mi_row) * MI_SIZE - h; - break; - case IBC_MOTION_LEFT: - x->mv_limits.col_min = (tile->mi_col_start - mi_col) * MI_SIZE; - x->mv_limits.col_max = - (sb_col * cm->seq_params.mib_size - mi_col) * MI_SIZE - w; - // TODO(aconverse@google.com): Minimize the overlap between above and - // left areas. - x->mv_limits.row_min = (tile->mi_row_start - mi_row) * MI_SIZE; - int bottom_coded_mi_edge = - AOMMIN((sb_row + 1) * cm->seq_params.mib_size, tile->mi_row_end); - x->mv_limits.row_max = (bottom_coded_mi_edge - mi_row) * MI_SIZE - h; - break; - default: assert(0); - } - assert(x->mv_limits.col_min >= tmp_mv_limits.col_min); - assert(x->mv_limits.col_max <= tmp_mv_limits.col_max); - assert(x->mv_limits.row_min >= tmp_mv_limits.row_min); - assert(x->mv_limits.row_max <= tmp_mv_limits.row_max); - av1_set_mv_search_range(&x->mv_limits, &dv_ref.as_mv); - - if (x->mv_limits.col_max < x->mv_limits.col_min || - x->mv_limits.row_max < x->mv_limits.row_min) { - x->mv_limits = tmp_mv_limits; - continue; - } - - int step_param = cpi->mv_step_param; - MV mvp_full = dv_ref.as_mv; - mvp_full.col >>= 3; - mvp_full.row >>= 3; - int sadpb = x->sadperbit16; - int cost_list[5]; - int bestsme = av1_full_pixel_search( - cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, 0, - sadpb, cond_cost_list(cpi, cost_list), &dv_ref.as_mv, INT_MAX, 1, - (MI_SIZE * mi_col), (MI_SIZE * mi_row), 1); - - x->mv_limits = tmp_mv_limits; - if (bestsme == INT_MAX) continue; - mvp_full = x->best_mv.as_mv; - MV dv = { .row = mvp_full.row * 8, .col = mvp_full.col * 8 }; - if (mv_check_bounds(&x->mv_limits, &dv)) continue; - if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize, - cm->seq_params.mib_size_log2)) - continue; - - // DV should not have sub-pel. - assert((dv.col & 7) == 0); - assert((dv.row & 7) == 0); - memset(&mbmi->palette_mode_info, 0, sizeof(mbmi->palette_mode_info)); - mbmi->filter_intra_mode_info.use_filter_intra = 0; - mbmi->use_intrabc = 1; - mbmi->mode = DC_PRED; - mbmi->uv_mode = UV_DC_PRED; - mbmi->motion_mode = SIMPLE_TRANSLATION; - mbmi->mv[0].as_mv = dv; - mbmi->interp_filters = av1_broadcast_interp_filter(BILINEAR); - mbmi->skip = 0; - x->skip = 0; - av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); - - int *dvcost[2] = { (int *)&cpi->dv_cost[0][MV_MAX], - (int *)&cpi->dv_cost[1][MV_MAX] }; - // TODO(aconverse@google.com): The full motion field defining discount - // in MV_COST_WEIGHT is too large. Explore other values. - int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, cpi->dv_joint_cost, - dvcost, MV_COST_WEIGHT_SUB); - const int rate_mode = x->intrabc_cost[1]; - RD_STATS rd_stats, rd_stats_uv; - av1_subtract_plane(x, bsize, 0); - if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) { - // Intrabc - select_tx_type_yrd(cpi, x, &rd_stats, bsize, mi_row, mi_col, INT64_MAX); - } else { - super_block_yrd(cpi, x, &rd_stats, bsize, INT64_MAX); - memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size)); - for (int i = 0; i < xd->n4_h * xd->n4_w; ++i) - set_blk_skip(x, 0, i, rd_stats.skip); - } - if (num_planes > 1) { - super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); - av1_merge_rd_stats(&rd_stats, &rd_stats_uv); - } -#if CONFIG_RD_DEBUG - mbmi->rd_stats = rd_stats; -#endif - - const int skip_ctx = av1_get_skip_context(xd); - - RD_STATS rdc_noskip; - av1_init_rd_stats(&rdc_noskip); - rdc_noskip.rate = - rate_mode + rate_mv + rd_stats.rate + x->skip_cost[skip_ctx][0]; - rdc_noskip.dist = rd_stats.dist; - rdc_noskip.rdcost = RDCOST(x->rdmult, rdc_noskip.rate, rdc_noskip.dist); - if (rdc_noskip.rdcost < best_rd) { - best_rd = rdc_noskip.rdcost; - best_mbmi = *mbmi; - best_skip = x->skip; - best_rdcost = rdc_noskip; - memcpy(best_blk_skip, x->blk_skip, - sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w); - } - - if (!xd->lossless[mbmi->segment_id]) { - x->skip = 1; - mbmi->skip = 1; - RD_STATS rdc_skip; - av1_init_rd_stats(&rdc_skip); - rdc_skip.rate = rate_mode + rate_mv + x->skip_cost[skip_ctx][1]; - rdc_skip.dist = rd_stats.sse; - rdc_skip.rdcost = RDCOST(x->rdmult, rdc_skip.rate, rdc_skip.dist); - if (rdc_skip.rdcost < best_rd) { - best_rd = rdc_skip.rdcost; - best_mbmi = *mbmi; - best_skip = x->skip; - best_rdcost = rdc_skip; - memcpy(best_blk_skip, x->blk_skip, - sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w); - } - } - } - *mbmi = best_mbmi; - *rd_cost = best_rdcost; - x->skip = best_skip; - memcpy(x->blk_skip, best_blk_skip, - sizeof(x->blk_skip[0]) * xd->n4_h * xd->n4_w); - return best_rd; -} - -void av1_rd_pick_intra_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, - int mi_col, RD_STATS *rd_cost, BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx, int64_t best_rd) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const int num_planes = av1_num_planes(cm); - int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; - int y_skip = 0, uv_skip = 0; - int64_t dist_y = 0, dist_uv = 0; - TX_SIZE max_uv_tx_size; - - ctx->skip = 0; - mbmi->ref_frame[0] = INTRA_FRAME; - mbmi->ref_frame[1] = NONE_FRAME; - mbmi->use_intrabc = 0; - mbmi->mv[0].as_int = 0; - - const int64_t intra_yrd = - rd_pick_intra_sby_mode(cpi, x, mi_row, mi_col, &rate_y, &rate_y_tokenonly, - &dist_y, &y_skip, bsize, best_rd, ctx); - - if (intra_yrd < best_rd) { - // Only store reconstructed luma when there's chroma RDO. When there's no - // chroma RDO, the reconstructed luma will be stored in encode_superblock(). - xd->cfl.is_chroma_reference = - is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x, - cm->seq_params.subsampling_y); - xd->cfl.store_y = store_cfl_required_rdo(cm, x); - if (xd->cfl.store_y) { - // Restore reconstructed luma values. - memcpy(x->blk_skip, ctx->blk_skip, - sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); - av1_encode_intra_block_plane(cpi, x, bsize, AOM_PLANE_Y, - cpi->optimize_seg_arr[mbmi->segment_id], - mi_row, mi_col); - xd->cfl.store_y = 0; - } - if (num_planes > 1) { - max_uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd); - init_sbuv_mode(mbmi); - if (!x->skip_chroma_rd) - rd_pick_intra_sbuv_mode(cpi, x, &rate_uv, &rate_uv_tokenonly, &dist_uv, - &uv_skip, bsize, max_uv_tx_size); - } - - if (y_skip && (uv_skip || x->skip_chroma_rd)) { - rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + - x->skip_cost[av1_get_skip_context(xd)][1]; - rd_cost->dist = dist_y + dist_uv; - } else { - rd_cost->rate = - rate_y + rate_uv + x->skip_cost[av1_get_skip_context(xd)][0]; - rd_cost->dist = dist_y + dist_uv; - } - rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist); - } else { - rd_cost->rate = INT_MAX; - } - - if (rd_cost->rate != INT_MAX && rd_cost->rdcost < best_rd) - best_rd = rd_cost->rdcost; - if (rd_pick_intrabc_mode_sb(cpi, x, rd_cost, bsize, best_rd) < best_rd) { - ctx->skip = x->skip; - memcpy(ctx->blk_skip, x->blk_skip, - sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); - assert(rd_cost->rate != INT_MAX); - } - if (rd_cost->rate == INT_MAX) return; - - ctx->mic = *xd->mi[0]; - ctx->mbmi_ext = *x->mbmi_ext; -} - -static void restore_uv_color_map(const AV1_COMP *const cpi, MACROBLOCK *x) { - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - const BLOCK_SIZE bsize = mbmi->sb_type; - int src_stride = x->plane[1].src.stride; - const uint8_t *const src_u = x->plane[1].src.buf; - const uint8_t *const src_v = x->plane[2].src.buf; - int *const data = x->palette_buffer->kmeans_data_buf; - int centroids[2 * PALETTE_MAX_SIZE]; - uint8_t *const color_map = xd->plane[1].color_index_map; - int r, c; - const uint16_t *const src_u16 = CONVERT_TO_SHORTPTR(src_u); - const uint16_t *const src_v16 = CONVERT_TO_SHORTPTR(src_v); - int plane_block_width, plane_block_height, rows, cols; - av1_get_block_dimensions(bsize, 1, xd, &plane_block_width, - &plane_block_height, &rows, &cols); - - for (r = 0; r < rows; ++r) { - for (c = 0; c < cols; ++c) { - if (cpi->common.seq_params.use_highbitdepth) { - data[(r * cols + c) * 2] = src_u16[r * src_stride + c]; - data[(r * cols + c) * 2 + 1] = src_v16[r * src_stride + c]; - } else { - data[(r * cols + c) * 2] = src_u[r * src_stride + c]; - data[(r * cols + c) * 2 + 1] = src_v[r * src_stride + c]; - } - } - } - - for (r = 1; r < 3; ++r) { - for (c = 0; c < pmi->palette_size[1]; ++c) { - centroids[c * 2 + r - 1] = pmi->palette_colors[r * PALETTE_MAX_SIZE + c]; - } - } - - av1_calc_indices(data, centroids, color_map, rows * cols, - pmi->palette_size[1], 2); - extend_palette_color_map(color_map, cols, rows, plane_block_width, - plane_block_height); -} - -static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, - const MACROBLOCKD *xd, int mi_row, - int mi_col, const uint8_t *above, - int above_stride, const uint8_t *left, - int left_stride); - -static const int ref_frame_flag_list[REF_FRAMES] = { 0, - AOM_LAST_FLAG, - AOM_LAST2_FLAG, - AOM_LAST3_FLAG, - AOM_GOLD_FLAG, - AOM_BWD_FLAG, - AOM_ALT2_FLAG, - AOM_ALT_FLAG }; - -static void rd_pick_skip_mode(RD_STATS *rd_cost, - InterModeSearchState *search_state, - const AV1_COMP *const cpi, MACROBLOCK *const x, - BLOCK_SIZE bsize, int mi_row, int mi_col, - struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - - x->compound_idx = 1; // COMPOUND_AVERAGE - RD_STATS skip_mode_rd_stats; - av1_invalid_rd_stats(&skip_mode_rd_stats); - - if (cm->ref_frame_idx_0 == INVALID_IDX || - cm->ref_frame_idx_1 == INVALID_IDX) { - return; - } - - const MV_REFERENCE_FRAME ref_frame = LAST_FRAME + cm->ref_frame_idx_0; - const MV_REFERENCE_FRAME second_ref_frame = LAST_FRAME + cm->ref_frame_idx_1; - const PREDICTION_MODE this_mode = NEAREST_NEARESTMV; - const int mode_index = - get_prediction_mode_idx(this_mode, ref_frame, second_ref_frame); - - if (mode_index == -1) { - return; - } - - mbmi->mode = this_mode; - mbmi->uv_mode = UV_DC_PRED; - mbmi->ref_frame[0] = ref_frame; - mbmi->ref_frame[1] = second_ref_frame; - const uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame); - if (x->mbmi_ext->ref_mv_count[ref_frame_type] == UINT8_MAX) { - if (x->mbmi_ext->ref_mv_count[ref_frame] == UINT8_MAX || - x->mbmi_ext->ref_mv_count[second_ref_frame] == UINT8_MAX) { - return; - } - MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext; - av1_find_mv_refs(cm, xd, mbmi, ref_frame_type, mbmi_ext->ref_mv_count, - mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row, - mi_col, mbmi_ext->mode_context); - } - - assert(this_mode == NEAREST_NEARESTMV); - if (!build_cur_mv(mbmi->mv, this_mode, cm, x)) { - return; - } - - mbmi->filter_intra_mode_info.use_filter_intra = 0; - mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1); - mbmi->comp_group_idx = 0; - mbmi->compound_idx = x->compound_idx; - mbmi->interinter_comp.type = COMPOUND_AVERAGE; - mbmi->motion_mode = SIMPLE_TRANSLATION; - mbmi->ref_mv_idx = 0; - mbmi->skip_mode = mbmi->skip = 1; - - set_default_interp_filters(mbmi, cm->interp_filter); - - set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - for (int i = 0; i < num_planes; i++) { - xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i]; - xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; - } - - BUFFER_SET orig_dst; - for (int i = 0; i < num_planes; i++) { - orig_dst.plane[i] = xd->plane[i].dst.buf; - orig_dst.stride[i] = xd->plane[i].dst.stride; - } - - // Obtain the rdcost for skip_mode. - skip_mode_rd(&skip_mode_rd_stats, cpi, x, bsize, mi_row, mi_col, &orig_dst); - - // Compare the use of skip_mode with the best intra/inter mode obtained. - const int skip_mode_ctx = av1_get_skip_mode_context(xd); - const int64_t best_intra_inter_mode_cost = - (rd_cost->dist < INT64_MAX && rd_cost->rate < INT32_MAX) - ? RDCOST(x->rdmult, - rd_cost->rate + x->skip_mode_cost[skip_mode_ctx][0], - rd_cost->dist) - : INT64_MAX; - - if (skip_mode_rd_stats.rdcost <= best_intra_inter_mode_cost) { - assert(mode_index != -1); - search_state->best_mbmode.skip_mode = 1; - search_state->best_mbmode = *mbmi; - - search_state->best_mbmode.skip_mode = search_state->best_mbmode.skip = 1; - search_state->best_mbmode.mode = NEAREST_NEARESTMV; - search_state->best_mbmode.ref_frame[0] = mbmi->ref_frame[0]; - search_state->best_mbmode.ref_frame[1] = mbmi->ref_frame[1]; - search_state->best_mbmode.mv[0].as_int = mbmi->mv[0].as_int; - search_state->best_mbmode.mv[1].as_int = mbmi->mv[1].as_int; - search_state->best_mbmode.ref_mv_idx = 0; - - // Set up tx_size related variables for skip-specific loop filtering. - search_state->best_mbmode.tx_size = - block_signals_txsize(bsize) ? tx_size_from_tx_mode(bsize, cm->tx_mode) - : max_txsize_rect_lookup[bsize]; - memset(search_state->best_mbmode.inter_tx_size, - search_state->best_mbmode.tx_size, - sizeof(search_state->best_mbmode.inter_tx_size)); - set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->n4_w, xd->n4_h, - search_state->best_mbmode.skip && is_inter_block(mbmi), xd); - - // Set up color-related variables for skip mode. - search_state->best_mbmode.uv_mode = UV_DC_PRED; - search_state->best_mbmode.palette_mode_info.palette_size[0] = 0; - search_state->best_mbmode.palette_mode_info.palette_size[1] = 0; - - search_state->best_mbmode.comp_group_idx = 0; - search_state->best_mbmode.compound_idx = x->compound_idx; - search_state->best_mbmode.interinter_comp.type = COMPOUND_AVERAGE; - search_state->best_mbmode.motion_mode = SIMPLE_TRANSLATION; - - search_state->best_mbmode.interintra_mode = - (INTERINTRA_MODE)(II_DC_PRED - 1); - search_state->best_mbmode.filter_intra_mode_info.use_filter_intra = 0; - - set_default_interp_filters(&search_state->best_mbmode, cm->interp_filter); - - search_state->best_mode_index = mode_index; - - // Update rd_cost - rd_cost->rate = skip_mode_rd_stats.rate; - rd_cost->dist = rd_cost->sse = skip_mode_rd_stats.dist; - rd_cost->rdcost = skip_mode_rd_stats.rdcost; - - search_state->best_rd = rd_cost->rdcost; - search_state->best_skip2 = 1; - search_state->best_mode_skippable = (skip_mode_rd_stats.sse == 0); - - x->skip = 1; - } -} - -// speed feature: fast intra/inter transform type search -// Used for speed >= 2 -// When this speed feature is on, in rd mode search, only DCT is used. -// After the mode is determined, this function is called, to select -// transform types and get accurate rdcost. -static void sf_refine_fast_tx_type_search( - const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, - RD_STATS *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, - int best_mode_index, MB_MODE_INFO *best_mbmode, - struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE], int best_rate_y, - int best_rate_uv, int *best_skip2) { - const AV1_COMMON *const cm = &cpi->common; - const SPEED_FEATURES *const sf = &cpi->sf; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const int num_planes = av1_num_planes(cm); - - if (xd->lossless[mbmi->segment_id] == 0 && best_mode_index >= 0 && - ((sf->tx_type_search.fast_inter_tx_type_search == 1 && - is_inter_mode(best_mbmode->mode)) || - (sf->tx_type_search.fast_intra_tx_type_search == 1 && - !is_inter_mode(best_mbmode->mode)))) { - int skip_blk = 0; - RD_STATS rd_stats_y, rd_stats_uv; - - x->use_default_inter_tx_type = 0; - x->use_default_intra_tx_type = 0; - - *mbmi = *best_mbmode; - - set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - - // Select prediction reference frames. - for (int i = 0; i < num_planes; i++) { - xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i]; - if (has_second_ref(mbmi)) - xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; - } - - if (is_inter_mode(mbmi->mode)) { - av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); - if (mbmi->motion_mode == OBMC_CAUSAL) - av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); - - av1_subtract_plane(x, bsize, 0); - if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id]) { - // av1_rd_pick_inter_mode_sb - select_tx_type_yrd(cpi, x, &rd_stats_y, bsize, mi_row, mi_col, - INT64_MAX); - assert(rd_stats_y.rate != INT_MAX); - } else { - super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); - memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size)); - for (int i = 0; i < xd->n4_h * xd->n4_w; ++i) - set_blk_skip(x, 0, i, rd_stats_y.skip); - } - if (num_planes > 1) { - inter_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX, INT64_MAX, - FTXS_NONE); - } else { - av1_init_rd_stats(&rd_stats_uv); - } - } else { - super_block_yrd(cpi, x, &rd_stats_y, bsize, INT64_MAX); - if (num_planes > 1) { - super_block_uvrd(cpi, x, &rd_stats_uv, bsize, INT64_MAX); - } else { - av1_init_rd_stats(&rd_stats_uv); - } - } - - if (RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate, - (rd_stats_y.dist + rd_stats_uv.dist)) > - RDCOST(x->rdmult, 0, (rd_stats_y.sse + rd_stats_uv.sse))) { - skip_blk = 1; - rd_stats_y.rate = x->skip_cost[av1_get_skip_context(xd)][1]; - rd_stats_uv.rate = 0; - rd_stats_y.dist = rd_stats_y.sse; - rd_stats_uv.dist = rd_stats_uv.sse; - } else { - skip_blk = 0; - rd_stats_y.rate += x->skip_cost[av1_get_skip_context(xd)][0]; - } - - if (RDCOST(x->rdmult, best_rate_y + best_rate_uv, rd_cost->dist) > - RDCOST(x->rdmult, rd_stats_y.rate + rd_stats_uv.rate, - (rd_stats_y.dist + rd_stats_uv.dist))) { - best_mbmode->tx_size = mbmi->tx_size; - av1_copy(best_mbmode->inter_tx_size, mbmi->inter_tx_size); - memcpy(ctx->blk_skip, x->blk_skip, - sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); - av1_copy(best_mbmode->txk_type, mbmi->txk_type); - rd_cost->rate += - (rd_stats_y.rate + rd_stats_uv.rate - best_rate_y - best_rate_uv); - rd_cost->dist = rd_stats_y.dist + rd_stats_uv.dist; - rd_cost->rdcost = RDCOST(x->rdmult, rd_cost->rate, rd_cost->dist); - *best_skip2 = skip_blk; - } - } -} - -// Please add/modify parameter setting in this function, making it consistent -// and easy to read and maintain. -static void set_params_rd_pick_inter_mode( - const AV1_COMP *cpi, MACROBLOCK *x, HandleInterModeArgs *args, - BLOCK_SIZE bsize, int mi_row, int mi_col, uint16_t ref_frame_skip_mask[2], - uint32_t mode_skip_mask[REF_FRAMES], int skip_ref_frame_mask, - unsigned int ref_costs_single[REF_FRAMES], - unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES], - struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; - const struct segmentation *const seg = &cm->seg; - const SPEED_FEATURES *const sf = &cpi->sf; - unsigned char segment_id = mbmi->segment_id; - int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; - int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, - MAX_SB_SIZE >> 1 }; - int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, - MAX_SB_SIZE >> 1 }; - int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; - - for (int i = 0; i < MB_MODE_COUNT; ++i) - for (int k = 0; k < REF_FRAMES; ++k) args->single_filter[i][k] = SWITCHABLE; - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - int len = sizeof(uint16_t); - args->above_pred_buf[0] = CONVERT_TO_BYTEPTR(x->above_pred_buf); - args->above_pred_buf[1] = - CONVERT_TO_BYTEPTR(x->above_pred_buf + (MAX_SB_SQUARE >> 1) * len); - args->above_pred_buf[2] = - CONVERT_TO_BYTEPTR(x->above_pred_buf + MAX_SB_SQUARE * len); - args->left_pred_buf[0] = CONVERT_TO_BYTEPTR(x->left_pred_buf); - args->left_pred_buf[1] = - CONVERT_TO_BYTEPTR(x->left_pred_buf + (MAX_SB_SQUARE >> 1) * len); - args->left_pred_buf[2] = - CONVERT_TO_BYTEPTR(x->left_pred_buf + MAX_SB_SQUARE * len); - } else { - args->above_pred_buf[0] = x->above_pred_buf; - args->above_pred_buf[1] = x->above_pred_buf + (MAX_SB_SQUARE >> 1); - args->above_pred_buf[2] = x->above_pred_buf + MAX_SB_SQUARE; - args->left_pred_buf[0] = x->left_pred_buf; - args->left_pred_buf[1] = x->left_pred_buf + (MAX_SB_SQUARE >> 1); - args->left_pred_buf[2] = x->left_pred_buf + MAX_SB_SQUARE; - } - - av1_collect_neighbors_ref_counts(xd); - - estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single, - ref_costs_comp); - - MV_REFERENCE_FRAME ref_frame; - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - x->pred_mv_sad[ref_frame] = INT_MAX; - x->mbmi_ext->mode_context[ref_frame] = 0; - x->mbmi_ext->compound_mode_context[ref_frame] = 0; - mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX; - if (cpi->ref_frame_flags & ref_frame_flag_list[ref_frame]) { - if (mbmi->partition != PARTITION_NONE && - mbmi->partition != PARTITION_SPLIT) { - if (skip_ref_frame_mask & (1 << ref_frame)) { - int skip = 1; - for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) { - if (!(skip_ref_frame_mask & (1 << r))) { - const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES]; - if (rf[0] == ref_frame || rf[1] == ref_frame) { - skip = 0; - break; - } - } - } - if (skip) continue; - } - } - assert(get_ref_frame_buffer(cpi, ref_frame) != NULL); - setup_buffer_ref_mvs_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, - yv12_mb); - } - } - // ref_frame = ALTREF_FRAME - for (; ref_frame < MODE_CTX_REF_FRAMES; ++ref_frame) { - x->mbmi_ext->mode_context[ref_frame] = 0; - mbmi_ext->ref_mv_count[ref_frame] = UINT8_MAX; - const MV_REFERENCE_FRAME *rf = ref_frame_map[ref_frame - REF_FRAMES]; - if (!((cpi->ref_frame_flags & ref_frame_flag_list[rf[0]]) && - (cpi->ref_frame_flags & ref_frame_flag_list[rf[1]]))) { - continue; - } - - if (mbmi->partition != PARTITION_NONE && - mbmi->partition != PARTITION_SPLIT) { - if (skip_ref_frame_mask & (1 << ref_frame)) { - continue; - } - } - av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count, - mbmi_ext->ref_mv_stack, NULL, mbmi_ext->global_mvs, mi_row, - mi_col, mbmi_ext->mode_context); - } - - av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col); - - if (check_num_overlappable_neighbors(mbmi) && - is_motion_variation_allowed_bsize(bsize)) { - av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, - args->above_pred_buf, dst_width1, - dst_height1, args->above_pred_stride); - av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, - args->left_pred_buf, dst_width2, - dst_height2, args->left_pred_stride); - av1_setup_dst_planes(xd->plane, bsize, get_frame_new_buffer(cm), mi_row, - mi_col, 0, num_planes); - calc_target_weighted_pred( - cm, x, xd, mi_row, mi_col, args->above_pred_buf[0], - args->above_pred_stride[0], args->left_pred_buf[0], - args->left_pred_stride[0]); - } - - int min_pred_mv_sad = INT_MAX; - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) - min_pred_mv_sad = AOMMIN(min_pred_mv_sad, x->pred_mv_sad[ref_frame]); - - for (int i = 0; i < 2; ++i) { - ref_frame_skip_mask[i] = 0; - } - memset(mode_skip_mask, 0, REF_FRAMES * sizeof(*mode_skip_mask)); - for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { - if (!(cpi->ref_frame_flags & ref_frame_flag_list[ref_frame])) { - // Skip checking missing references in both single and compound reference - // modes. Note that a mode will be skipped iff both reference frames - // are masked out. - ref_frame_skip_mask[0] |= (1 << ref_frame); - ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; - } else { - // Skip fixed mv modes for poor references - if ((x->pred_mv_sad[ref_frame] >> 2) > min_pred_mv_sad) { - mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO; - } - } - // If the segment reference frame feature is enabled.... - // then do nothing if the current ref frame is not allowed.. - if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && - get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { - ref_frame_skip_mask[0] |= (1 << ref_frame); - ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; - } - } - - // Disable this drop out case if the ref frame - // segment level feature is enabled for this segment. This is to - // prevent the possibility that we end up unable to pick any mode. - if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { - // Only consider GLOBALMV/ALTREF_FRAME for alt ref frame, - // unless ARNR filtering is enabled in which case we want - // an unfiltered alternative. We allow near/nearest as well - // because they may result in zero-zero MVs but be cheaper. - if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { - ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << LAST2_FRAME) | - (1 << LAST3_FRAME) | (1 << BWDREF_FRAME) | - (1 << ALTREF2_FRAME) | (1 << GOLDEN_FRAME); - ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; - // TODO(zoeliu): To further explore whether following needs to be done for - // BWDREF_FRAME as well. - mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO; - const MV_REFERENCE_FRAME tmp_ref_frames[2] = { ALTREF_FRAME, NONE_FRAME }; - int_mv near_mv, nearest_mv, global_mv; - get_this_mv(&nearest_mv, NEARESTMV, 0, 0, tmp_ref_frames, x->mbmi_ext); - get_this_mv(&near_mv, NEARMV, 0, 0, tmp_ref_frames, x->mbmi_ext); - get_this_mv(&global_mv, GLOBALMV, 0, 0, tmp_ref_frames, x->mbmi_ext); - - if (near_mv.as_int != global_mv.as_int) - mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV); - if (nearest_mv.as_int != global_mv.as_int) - mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV); - } - } - - if (cpi->rc.is_src_frame_alt_ref) { - if (sf->alt_ref_search_fp) { - assert(cpi->ref_frame_flags & ref_frame_flag_list[ALTREF_FRAME]); - mode_skip_mask[ALTREF_FRAME] = 0; - ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME); - ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; - } - } - - if (sf->alt_ref_search_fp) - if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX) - if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1)) - mode_skip_mask[ALTREF_FRAME] |= INTER_ALL; - - if (sf->adaptive_mode_search) { - if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref && - cpi->rc.frames_since_golden >= 3) - if ((x->pred_mv_sad[GOLDEN_FRAME] >> 1) > x->pred_mv_sad[LAST_FRAME]) - mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL; - } - - if (bsize > sf->max_intra_bsize) { - ref_frame_skip_mask[0] |= (1 << INTRA_FRAME); - ref_frame_skip_mask[1] |= (1 << INTRA_FRAME); - } - - mode_skip_mask[INTRA_FRAME] |= - ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]); - - if (cpi->sf.tx_type_search.fast_intra_tx_type_search) - x->use_default_intra_tx_type = 1; - else - x->use_default_intra_tx_type = 0; - - if (cpi->sf.tx_type_search.fast_inter_tx_type_search) - x->use_default_inter_tx_type = 1; - else - x->use_default_inter_tx_type = 0; - if (cpi->sf.skip_repeat_interpolation_filter_search) { - x->interp_filter_stats_idx[0] = 0; - x->interp_filter_stats_idx[1] = 0; - } -} - -static void search_palette_mode(const AV1_COMP *cpi, MACROBLOCK *x, int mi_row, - int mi_col, RD_STATS *rd_cost, - PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize, - MB_MODE_INFO *const mbmi, - PALETTE_MODE_INFO *const pmi, - unsigned int *ref_costs_single, - InterModeSearchState *search_state) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCKD *const xd = &x->e_mbd; - int rate2 = 0; - int64_t distortion2 = 0, best_rd_palette = search_state->best_rd, this_rd, - best_model_rd_palette = INT64_MAX; - int skippable = 0, rate_overhead_palette = 0; - RD_STATS rd_stats_y; - TX_SIZE uv_tx = TX_4X4; - uint8_t *const best_palette_color_map = - x->palette_buffer->best_palette_color_map; - uint8_t *const color_map = xd->plane[0].color_index_map; - MB_MODE_INFO best_mbmi_palette = *mbmi; - uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; - const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]]; - const int rows = block_size_high[bsize]; - const int cols = block_size_wide[bsize]; - - mbmi->mode = DC_PRED; - mbmi->uv_mode = UV_DC_PRED; - mbmi->ref_frame[0] = INTRA_FRAME; - mbmi->ref_frame[1] = NONE_FRAME; - rate_overhead_palette = rd_pick_palette_intra_sby( - cpi, x, bsize, mi_row, mi_col, intra_mode_cost[DC_PRED], - &best_mbmi_palette, best_palette_color_map, &best_rd_palette, - &best_model_rd_palette, NULL, NULL, NULL, NULL, ctx, best_blk_skip); - if (pmi->palette_size[0] == 0) return; - - memcpy(x->blk_skip, best_blk_skip, - sizeof(best_blk_skip[0]) * bsize_to_num_blk(bsize)); - - memcpy(color_map, best_palette_color_map, - rows * cols * sizeof(best_palette_color_map[0])); - super_block_yrd(cpi, x, &rd_stats_y, bsize, search_state->best_rd); - if (rd_stats_y.rate == INT_MAX) return; - - skippable = rd_stats_y.skip; - distortion2 = rd_stats_y.dist; - rate2 = rd_stats_y.rate + rate_overhead_palette; - rate2 += ref_costs_single[INTRA_FRAME]; - if (num_planes > 1) { - uv_tx = av1_get_tx_size(AOM_PLANE_U, xd); - if (search_state->rate_uv_intra[uv_tx] == INT_MAX) { - choose_intra_uv_mode( - cpi, x, bsize, uv_tx, &search_state->rate_uv_intra[uv_tx], - &search_state->rate_uv_tokenonly[uv_tx], - &search_state->dist_uvs[uv_tx], &search_state->skip_uvs[uv_tx], - &search_state->mode_uv[uv_tx]); - search_state->pmi_uv[uv_tx] = *pmi; - search_state->uv_angle_delta[uv_tx] = mbmi->angle_delta[PLANE_TYPE_UV]; - } - mbmi->uv_mode = search_state->mode_uv[uv_tx]; - pmi->palette_size[1] = search_state->pmi_uv[uv_tx].palette_size[1]; - if (pmi->palette_size[1] > 0) { - memcpy(pmi->palette_colors + PALETTE_MAX_SIZE, - search_state->pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE, - 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0])); - } - mbmi->angle_delta[PLANE_TYPE_UV] = search_state->uv_angle_delta[uv_tx]; - skippable = skippable && search_state->skip_uvs[uv_tx]; - distortion2 += search_state->dist_uvs[uv_tx]; - rate2 += search_state->rate_uv_intra[uv_tx]; - } - - if (skippable) { - rate2 -= rd_stats_y.rate; - if (num_planes > 1) rate2 -= search_state->rate_uv_tokenonly[uv_tx]; - rate2 += x->skip_cost[av1_get_skip_context(xd)][1]; - } else { - rate2 += x->skip_cost[av1_get_skip_context(xd)][0]; - } - this_rd = RDCOST(x->rdmult, rate2, distortion2); - if (this_rd < search_state->best_rd) { - search_state->best_mode_index = 3; - mbmi->mv[0].as_int = 0; - rd_cost->rate = rate2; - rd_cost->dist = distortion2; - rd_cost->rdcost = this_rd; - search_state->best_rd = this_rd; - search_state->best_mbmode = *mbmi; - search_state->best_skip2 = 0; - search_state->best_mode_skippable = skippable; - memcpy(ctx->blk_skip, x->blk_skip, - sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); - } -} - -static void init_inter_mode_search_state(InterModeSearchState *search_state, - const AV1_COMP *cpi, - const TileDataEnc *tile_data, - const MACROBLOCK *x, BLOCK_SIZE bsize, - int64_t best_rd_so_far) { - search_state->best_rd = best_rd_so_far; - - av1_zero(search_state->best_mbmode); - - search_state->best_rate_y = INT_MAX; - - search_state->best_rate_uv = INT_MAX; - - search_state->best_mode_skippable = 0; - - search_state->best_skip2 = 0; - - search_state->best_mode_index = -1; - - const MACROBLOCKD *const xd = &x->e_mbd; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const unsigned char segment_id = mbmi->segment_id; - - search_state->skip_intra_modes = 0; - - search_state->num_available_refs = 0; - memset(search_state->dist_refs, -1, sizeof(search_state->dist_refs)); - memset(search_state->dist_order_refs, -1, - sizeof(search_state->dist_order_refs)); - - for (int i = 0; i <= LAST_NEW_MV_INDEX; ++i) - search_state->mode_threshold[i] = 0; - const int *const rd_threshes = cpi->rd.threshes[segment_id][bsize]; - for (int i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i) - search_state->mode_threshold[i] = - ((int64_t)rd_threshes[i] * tile_data->thresh_freq_fact[bsize][i]) >> 5; - - search_state->best_intra_mode = DC_PRED; - search_state->best_intra_rd = INT64_MAX; - - search_state->angle_stats_ready = 0; - - search_state->best_pred_sse = UINT_MAX; - - for (int i = 0; i < TX_SIZES_ALL; i++) - search_state->rate_uv_intra[i] = INT_MAX; - - av1_zero(search_state->pmi_uv); - - for (int i = 0; i < REFERENCE_MODES; ++i) - search_state->best_pred_rd[i] = INT64_MAX; - - av1_zero(search_state->single_newmv); - av1_zero(search_state->single_newmv_rate); - av1_zero(search_state->single_newmv_valid); - for (int i = 0; i < MB_MODE_COUNT; ++i) { - for (int j = 0; j < MAX_REF_MV_SERCH; ++j) { - for (int ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) { - search_state->modelled_rd[i][j][ref_frame] = INT64_MAX; - search_state->simple_rd[i][j][ref_frame] = INT64_MAX; - } - } - } - - for (int dir = 0; dir < 2; ++dir) { - for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) { - for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) { - SingleInterModeState *state; - - state = &search_state->single_state[dir][mode][ref_frame]; - state->ref_frame = NONE_FRAME; - state->rd = INT64_MAX; - - state = &search_state->single_state_modelled[dir][mode][ref_frame]; - state->ref_frame = NONE_FRAME; - state->rd = INT64_MAX; - } - } - } - for (int dir = 0; dir < 2; ++dir) { - for (int mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) { - for (int ref_frame = 0; ref_frame < FWD_REFS; ++ref_frame) { - search_state->single_rd_order[dir][mode][ref_frame] = NONE_FRAME; - } - } - } - av1_zero(search_state->single_state_cnt); - av1_zero(search_state->single_state_modelled_cnt); -} - -// Case 1: return 0, means don't skip this mode -// Case 2: return 1, means skip this mode completely -// Case 3: return 2, means skip compound only, but still try single motion modes -static int inter_mode_search_order_independent_skip( - const AV1_COMP *cpi, const PICK_MODE_CONTEXT *ctx, const MACROBLOCK *x, - BLOCK_SIZE bsize, int mode_index, int mi_row, int mi_col, - uint32_t *mode_skip_mask, uint16_t *ref_frame_skip_mask, - InterModeSearchState *search_state) { - const SPEED_FEATURES *const sf = &cpi->sf; - const AV1_COMMON *const cm = &cpi->common; - const struct segmentation *const seg = &cm->seg; - const MACROBLOCKD *const xd = &x->e_mbd; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const unsigned char segment_id = mbmi->segment_id; - const MV_REFERENCE_FRAME *ref_frame = av1_mode_order[mode_index].ref_frame; - const PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode; - int skip_motion_mode = 0; - if (mbmi->partition != PARTITION_NONE && mbmi->partition != PARTITION_SPLIT) { - const int ref_type = av1_ref_frame_type(ref_frame); - int skip_ref = ctx->skip_ref_frame_mask & (1 << ref_type); - if (ref_type <= ALTREF_FRAME && skip_ref) { - // Since the compound ref modes depends on the motion estimation result of - // two single ref modes( best mv of single ref modes as the start point ) - // If current single ref mode is marked skip, we need to check if it will - // be used in compound ref modes. - for (int r = ALTREF_FRAME + 1; r < MODE_CTX_REF_FRAMES; ++r) { - if (!(ctx->skip_ref_frame_mask & (1 << r))) { - const MV_REFERENCE_FRAME *rf = ref_frame_map[r - REF_FRAMES]; - if (rf[0] == ref_type || rf[1] == ref_type) { - // Found a not skipped compound ref mode which contains current - // single ref. So this single ref can't be skipped completly - // Just skip it's motion mode search, still try it's simple - // transition mode. - skip_motion_mode = 1; - skip_ref = 0; - break; - } - } - } - } - if (skip_ref) return 1; - } - - if (cpi->sf.mode_pruning_based_on_two_pass_partition_search && - !x->cb_partition_scan) { - const int mi_width = mi_size_wide[bsize]; - const int mi_height = mi_size_high[bsize]; - int found = 0; - // Search in the stats table to see if the ref frames have been used in the - // first pass of partition search. - for (int row = mi_row; row < mi_row + mi_width && !found; - row += FIRST_PARTITION_PASS_SAMPLE_REGION) { - for (int col = mi_col; col < mi_col + mi_height && !found; - col += FIRST_PARTITION_PASS_SAMPLE_REGION) { - const int index = av1_first_partition_pass_stats_index(row, col); - const FIRST_PARTITION_PASS_STATS *const stats = - &x->first_partition_pass_stats[index]; - if (stats->ref0_counts[ref_frame[0]] && - (ref_frame[1] < 0 || stats->ref1_counts[ref_frame[1]])) { - found = 1; - break; - } - } - } - if (!found) return 1; - } - - if (ref_frame[0] > INTRA_FRAME && ref_frame[1] == INTRA_FRAME) { - // Mode must by compatible - if (!is_interintra_allowed_mode(this_mode)) return 1; - if (!is_interintra_allowed_bsize(bsize)) return 1; - } - - // This is only used in motion vector unit test. - if (cpi->oxcf.motion_vector_unit_test && ref_frame[0] == INTRA_FRAME) - return 1; - - if (ref_frame[0] == INTRA_FRAME) { - if (this_mode != DC_PRED) { - // Disable intra modes other than DC_PRED for blocks with low variance - // Threshold for intra skipping based on source variance - // TODO(debargha): Specialize the threshold for super block sizes - const unsigned int skip_intra_var_thresh = 64; - if ((sf->mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && - x->source_variance < skip_intra_var_thresh) - return 1; - } - } else { - if (!is_comp_ref_allowed(bsize) && ref_frame[1] > INTRA_FRAME) return 1; - } - - const int comp_pred = ref_frame[1] > INTRA_FRAME; - if (comp_pred) { - if (!cpi->allow_comp_inter_inter) return 1; - - if (cm->reference_mode == SINGLE_REFERENCE) return 1; - - // Skip compound inter modes if ARF is not available. - if (!(cpi->ref_frame_flags & ref_frame_flag_list[ref_frame[1]])) return 1; - - // Do not allow compound prediction if the segment level reference frame - // feature is in use as in this case there can only be one reference. - if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) return 1; - } - - if (sf->selective_ref_frame) { - if (sf->selective_ref_frame >= 2 || x->cb_partition_scan) { - if (ref_frame[0] == ALTREF2_FRAME || ref_frame[1] == ALTREF2_FRAME) - if (get_relative_dist( - cm, cm->cur_frame->ref_frame_offset[ALTREF2_FRAME - LAST_FRAME], - cm->frame_offset) < 0) - return 1; - if (ref_frame[0] == BWDREF_FRAME || ref_frame[1] == BWDREF_FRAME) - if (get_relative_dist( - cm, cm->cur_frame->ref_frame_offset[BWDREF_FRAME - LAST_FRAME], - cm->frame_offset) < 0) - return 1; - } - if (ref_frame[0] == LAST3_FRAME || ref_frame[1] == LAST3_FRAME) - if (get_relative_dist( - cm, cm->cur_frame->ref_frame_offset[LAST3_FRAME - LAST_FRAME], - cm->cur_frame->ref_frame_offset[GOLDEN_FRAME - LAST_FRAME]) <= 0) - return 1; - if (ref_frame[0] == LAST2_FRAME || ref_frame[1] == LAST2_FRAME) - if (get_relative_dist( - cm, cm->cur_frame->ref_frame_offset[LAST2_FRAME - LAST_FRAME], - cm->cur_frame->ref_frame_offset[GOLDEN_FRAME - LAST_FRAME]) <= 0) - return 1; - } - - // One-sided compound is used only when all reference frames are one-sided. - if (sf->selective_ref_frame && comp_pred && !cpi->all_one_sided_refs) { - unsigned int ref_offsets[2]; - for (int i = 0; i < 2; ++i) { - const int buf_idx = cm->frame_refs[ref_frame[i] - LAST_FRAME].idx; - assert(buf_idx >= 0); - ref_offsets[i] = cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset; - } - if ((get_relative_dist(cm, ref_offsets[0], cm->frame_offset) <= 0 && - get_relative_dist(cm, ref_offsets[1], cm->frame_offset) <= 0) || - (get_relative_dist(cm, ref_offsets[0], cm->frame_offset) > 0 && - get_relative_dist(cm, ref_offsets[1], cm->frame_offset) > 0)) - return 1; - } - - if (mode_skip_mask[ref_frame[0]] & (1 << this_mode)) { - return 1; - } - - if ((ref_frame_skip_mask[0] & (1 << ref_frame[0])) && - (ref_frame_skip_mask[1] & (1 << AOMMAX(0, ref_frame[1])))) { - return 1; - } - - if (skip_repeated_mv(cm, x, this_mode, ref_frame, search_state)) { - return 1; - } - if (skip_motion_mode) { - return 2; - } - return 0; -} - -static INLINE void init_mbmi(MB_MODE_INFO *mbmi, int mode_index, - const AV1_COMMON *cm) { - PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - PREDICTION_MODE this_mode = av1_mode_order[mode_index].mode; - mbmi->ref_mv_idx = 0; - mbmi->mode = this_mode; - mbmi->uv_mode = UV_DC_PRED; - mbmi->ref_frame[0] = av1_mode_order[mode_index].ref_frame[0]; - mbmi->ref_frame[1] = av1_mode_order[mode_index].ref_frame[1]; - pmi->palette_size[0] = 0; - pmi->palette_size[1] = 0; - mbmi->filter_intra_mode_info.use_filter_intra = 0; - mbmi->mv[0].as_int = mbmi->mv[1].as_int = 0; - mbmi->motion_mode = SIMPLE_TRANSLATION; - mbmi->interintra_mode = (INTERINTRA_MODE)(II_DC_PRED - 1); - set_default_interp_filters(mbmi, cm->interp_filter); -} - -static int64_t handle_intra_mode(InterModeSearchState *search_state, - const AV1_COMP *cpi, MACROBLOCK *x, - BLOCK_SIZE bsize, int mi_row, int mi_col, - int ref_frame_cost, - const PICK_MODE_CONTEXT *ctx, int disable_skip, - RD_STATS *rd_stats, RD_STATS *rd_stats_y, - RD_STATS *rd_stats_uv) { - const AV1_COMMON *cm = &cpi->common; - const SPEED_FEATURES *const sf = &cpi->sf; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - assert(mbmi->ref_frame[0] == INTRA_FRAME); - PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - const int try_palette = - av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type); - const int *const intra_mode_cost = x->mbmode_cost[size_group_lookup[bsize]]; - const int intra_cost_penalty = av1_get_intra_cost_penalty( - cm->base_qindex, cm->y_dc_delta_q, cm->seq_params.bit_depth); - const int rows = block_size_high[bsize]; - const int cols = block_size_wide[bsize]; - const int num_planes = av1_num_planes(cm); - const int skip_ctx = av1_get_skip_context(xd); - - int known_rate = intra_mode_cost[mbmi->mode]; - known_rate += ref_frame_cost; - if (mbmi->mode != DC_PRED && mbmi->mode != PAETH_PRED) - known_rate += intra_cost_penalty; - known_rate += AOMMIN(x->skip_cost[skip_ctx][0], x->skip_cost[skip_ctx][1]); - const int64_t known_rd = RDCOST(x->rdmult, known_rate, 0); - if (known_rd > search_state->best_rd) { - search_state->skip_intra_modes = 1; - return INT64_MAX; - } - - TX_SIZE uv_tx; - int is_directional_mode = av1_is_directional_mode(mbmi->mode); - if (is_directional_mode && av1_use_angle_delta(bsize)) { - int rate_dummy; - int64_t model_rd = INT64_MAX; - if (!search_state->angle_stats_ready) { - const int src_stride = x->plane[0].src.stride; - const uint8_t *src = x->plane[0].src.buf; - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - highbd_angle_estimation(src, src_stride, rows, cols, bsize, - search_state->directional_mode_skip_mask); - else - angle_estimation(src, src_stride, rows, cols, bsize, - search_state->directional_mode_skip_mask); - search_state->angle_stats_ready = 1; - } - if (search_state->directional_mode_skip_mask[mbmi->mode]) return INT64_MAX; - av1_init_rd_stats(rd_stats_y); - rd_stats_y->rate = INT_MAX; - rd_pick_intra_angle_sby(cpi, x, mi_row, mi_col, &rate_dummy, rd_stats_y, - bsize, intra_mode_cost[mbmi->mode], - search_state->best_rd, &model_rd); - } else { - av1_init_rd_stats(rd_stats_y); - mbmi->angle_delta[PLANE_TYPE_Y] = 0; - super_block_yrd(cpi, x, rd_stats_y, bsize, search_state->best_rd); - } - uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE]; - memcpy(best_blk_skip, x->blk_skip, - sizeof(best_blk_skip[0]) * ctx->num_4x4_blk); - int try_filter_intra = 0; - int64_t best_rd_tmp = INT64_MAX; - if (mbmi->mode == DC_PRED && av1_filter_intra_allowed_bsize(cm, bsize)) { - if (rd_stats_y->rate != INT_MAX) { - const int tmp_rate = rd_stats_y->rate + x->filter_intra_cost[bsize][0] + - intra_mode_cost[mbmi->mode]; - best_rd_tmp = RDCOST(x->rdmult, tmp_rate, rd_stats_y->dist); - try_filter_intra = !((best_rd_tmp / 2) > search_state->best_rd); - } else { - try_filter_intra = !(search_state->best_mbmode.skip); - } - } - if (try_filter_intra) { - RD_STATS rd_stats_y_fi; - int filter_intra_selected_flag = 0; - TX_SIZE best_tx_size = mbmi->tx_size; - TX_TYPE best_txk_type[TXK_TYPE_BUF_LEN]; - memcpy(best_txk_type, mbmi->txk_type, - sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); - FILTER_INTRA_MODE best_fi_mode = FILTER_DC_PRED; - - mbmi->filter_intra_mode_info.use_filter_intra = 1; - for (FILTER_INTRA_MODE fi_mode = FILTER_DC_PRED; - fi_mode < FILTER_INTRA_MODES; ++fi_mode) { - int64_t this_rd_tmp; - mbmi->filter_intra_mode_info.filter_intra_mode = fi_mode; - super_block_yrd(cpi, x, &rd_stats_y_fi, bsize, search_state->best_rd); - if (rd_stats_y_fi.rate == INT_MAX) { - continue; - } - const int this_rate_tmp = - rd_stats_y_fi.rate + - intra_mode_info_cost_y(cpi, x, mbmi, bsize, - intra_mode_cost[mbmi->mode]); - this_rd_tmp = RDCOST(x->rdmult, this_rate_tmp, rd_stats_y_fi.dist); - - if (this_rd_tmp != INT64_MAX && this_rd_tmp / 2 > search_state->best_rd) { - break; - } - if (this_rd_tmp < best_rd_tmp) { - best_tx_size = mbmi->tx_size; - memcpy(best_txk_type, mbmi->txk_type, - sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); - memcpy(best_blk_skip, x->blk_skip, - sizeof(best_blk_skip[0]) * ctx->num_4x4_blk); - best_fi_mode = fi_mode; - *rd_stats_y = rd_stats_y_fi; - filter_intra_selected_flag = 1; - best_rd_tmp = this_rd_tmp; - } - } - - mbmi->tx_size = best_tx_size; - memcpy(mbmi->txk_type, best_txk_type, - sizeof(*best_txk_type) * TXK_TYPE_BUF_LEN); - memcpy(x->blk_skip, best_blk_skip, - sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); - - if (filter_intra_selected_flag) { - mbmi->filter_intra_mode_info.use_filter_intra = 1; - mbmi->filter_intra_mode_info.filter_intra_mode = best_fi_mode; - } else { - mbmi->filter_intra_mode_info.use_filter_intra = 0; - } - } - if (rd_stats_y->rate == INT_MAX) return INT64_MAX; - const int mode_cost_y = - intra_mode_info_cost_y(cpi, x, mbmi, bsize, intra_mode_cost[mbmi->mode]); - av1_init_rd_stats(rd_stats); - av1_init_rd_stats(rd_stats_uv); - if (num_planes > 1) { - uv_tx = av1_get_tx_size(AOM_PLANE_U, xd); - if (search_state->rate_uv_intra[uv_tx] == INT_MAX) { - int rate_y = - rd_stats_y->skip ? x->skip_cost[skip_ctx][1] : rd_stats_y->rate; - const int64_t rdy = - RDCOST(x->rdmult, rate_y + mode_cost_y, rd_stats_y->dist); - if (search_state->best_rd < (INT64_MAX / 2) && - rdy > (search_state->best_rd + (search_state->best_rd >> 2))) { - search_state->skip_intra_modes = 1; - return INT64_MAX; - } - choose_intra_uv_mode( - cpi, x, bsize, uv_tx, &search_state->rate_uv_intra[uv_tx], - &search_state->rate_uv_tokenonly[uv_tx], - &search_state->dist_uvs[uv_tx], &search_state->skip_uvs[uv_tx], - &search_state->mode_uv[uv_tx]); - if (try_palette) search_state->pmi_uv[uv_tx] = *pmi; - search_state->uv_angle_delta[uv_tx] = mbmi->angle_delta[PLANE_TYPE_UV]; - - const int uv_rate = search_state->rate_uv_tokenonly[uv_tx]; - const int64_t uv_dist = search_state->dist_uvs[uv_tx]; - const int64_t uv_rd = RDCOST(x->rdmult, uv_rate, uv_dist); - if (uv_rd > search_state->best_rd) { - search_state->skip_intra_modes = 1; - return INT64_MAX; - } - } - - rd_stats_uv->rate = search_state->rate_uv_tokenonly[uv_tx]; - rd_stats_uv->dist = search_state->dist_uvs[uv_tx]; - rd_stats_uv->skip = search_state->skip_uvs[uv_tx]; - rd_stats->skip = rd_stats_y->skip && rd_stats_uv->skip; - mbmi->uv_mode = search_state->mode_uv[uv_tx]; - if (try_palette) { - pmi->palette_size[1] = search_state->pmi_uv[uv_tx].palette_size[1]; - memcpy(pmi->palette_colors + PALETTE_MAX_SIZE, - search_state->pmi_uv[uv_tx].palette_colors + PALETTE_MAX_SIZE, - 2 * PALETTE_MAX_SIZE * sizeof(pmi->palette_colors[0])); - } - mbmi->angle_delta[PLANE_TYPE_UV] = search_state->uv_angle_delta[uv_tx]; - } - rd_stats->rate = rd_stats_y->rate + mode_cost_y; - if (!xd->lossless[mbmi->segment_id] && block_signals_txsize(bsize)) { - // super_block_yrd above includes the cost of the tx_size in the - // tokenonly rate, but for intra blocks, tx_size is always coded - // (prediction granularity), so we account for it in the full rate, - // not the tokenonly rate. - rd_stats_y->rate -= tx_size_cost(cm, x, bsize, mbmi->tx_size); - } - if (num_planes > 1 && !x->skip_chroma_rd) { - const int uv_mode_cost = - x->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mbmi->uv_mode]; - rd_stats->rate += - rd_stats_uv->rate + - intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost); - } - if (mbmi->mode != DC_PRED && mbmi->mode != PAETH_PRED) - rd_stats->rate += intra_cost_penalty; - rd_stats->dist = rd_stats_y->dist + rd_stats_uv->dist; - - // Estimate the reference frame signaling cost and add it - // to the rolling cost variable. - rd_stats->rate += ref_frame_cost; - if (rd_stats->skip) { - // Back out the coefficient coding costs - rd_stats->rate -= (rd_stats_y->rate + rd_stats_uv->rate); - rd_stats_y->rate = 0; - rd_stats_uv->rate = 0; - // Cost the skip mb case - rd_stats->rate += x->skip_cost[skip_ctx][1]; - } else { - // Add in the cost of the no skip flag. - rd_stats->rate += x->skip_cost[skip_ctx][0]; - } - // Calculate the final RD estimate for this mode. - const int64_t this_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist); - // Keep record of best intra rd - if (this_rd < search_state->best_intra_rd) { - search_state->best_intra_rd = this_rd; - search_state->best_intra_mode = mbmi->mode; - } - - if (sf->skip_intra_in_interframe) { - if (search_state->best_rd < (INT64_MAX / 2) && - this_rd > (search_state->best_rd + (search_state->best_rd >> 1))) - search_state->skip_intra_modes = 1; - } - - if (!disable_skip) { - for (int i = 0; i < REFERENCE_MODES; ++i) - search_state->best_pred_rd[i] = - AOMMIN(search_state->best_pred_rd[i], this_rd); - } - return this_rd; -} - -static void collect_single_states(MACROBLOCK *x, - InterModeSearchState *search_state, - const MB_MODE_INFO *const mbmi) { - int i, j; - const MV_REFERENCE_FRAME ref_frame = mbmi->ref_frame[0]; - const PREDICTION_MODE this_mode = mbmi->mode; - const int dir = ref_frame <= GOLDEN_FRAME ? 0 : 1; - const int mode_offset = INTER_OFFSET(this_mode); - const int ref_set = get_drl_refmv_count(x, mbmi->ref_frame, this_mode); - - // Simple rd - int64_t simple_rd = search_state->simple_rd[this_mode][0][ref_frame]; - for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) { - int64_t rd = search_state->simple_rd[this_mode][ref_mv_idx][ref_frame]; - if (rd < simple_rd) simple_rd = rd; - } - - // Insertion sort of single_state - SingleInterModeState this_state_s = { simple_rd, ref_frame, 1 }; - SingleInterModeState *state_s = search_state->single_state[dir][mode_offset]; - i = search_state->single_state_cnt[dir][mode_offset]; - for (j = i; j > 0 && state_s[j - 1].rd > this_state_s.rd; --j) - state_s[j] = state_s[j - 1]; - state_s[j] = this_state_s; - search_state->single_state_cnt[dir][mode_offset]++; - - // Modelled rd - int64_t modelled_rd = search_state->modelled_rd[this_mode][0][ref_frame]; - for (int ref_mv_idx = 1; ref_mv_idx < ref_set; ++ref_mv_idx) { - int64_t rd = search_state->modelled_rd[this_mode][ref_mv_idx][ref_frame]; - if (rd < modelled_rd) modelled_rd = rd; - } - - // Insertion sort of single_state_modelled - SingleInterModeState this_state_m = { modelled_rd, ref_frame, 1 }; - SingleInterModeState *state_m = - search_state->single_state_modelled[dir][mode_offset]; - i = search_state->single_state_modelled_cnt[dir][mode_offset]; - for (j = i; j > 0 && state_m[j - 1].rd > this_state_m.rd; --j) - state_m[j] = state_m[j - 1]; - state_m[j] = this_state_m; - search_state->single_state_modelled_cnt[dir][mode_offset]++; -} - -static void analyze_single_states(const AV1_COMP *cpi, - InterModeSearchState *search_state) { - int i, j, dir, mode; - if (cpi->sf.prune_comp_search_by_single_result >= 1) { - for (dir = 0; dir < 2; ++dir) { - int64_t best_rd; - SingleInterModeState(*state)[FWD_REFS]; - - // Use the best rd of GLOBALMV or NEWMV to prune the unlikely - // reference frames for all the modes (NEARESTMV and NEARMV may not - // have same motion vectors). Always keep the best of each mode - // because it might form the best possible combination with other mode. - state = search_state->single_state[dir]; - best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd, - state[INTER_OFFSET(GLOBALMV)][0].rd); - for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) { - for (i = 1; i < search_state->single_state_cnt[dir][mode]; ++i) { - if (state[mode][i].rd != INT64_MAX && - (state[mode][i].rd >> 1) > best_rd) { - state[mode][i].valid = 0; - } - } - } - - state = search_state->single_state_modelled[dir]; - best_rd = AOMMIN(state[INTER_OFFSET(NEWMV)][0].rd, - state[INTER_OFFSET(GLOBALMV)][0].rd); - for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) { - for (i = 1; i < search_state->single_state_modelled_cnt[dir][mode]; - ++i) { - if (state[mode][i].rd != INT64_MAX && - (state[mode][i].rd >> 1) > best_rd) { - state[mode][i].valid = 0; - } - } - } - } - } - - // Ordering by simple rd first, then by modelled rd - for (dir = 0; dir < 2; ++dir) { - for (mode = 0; mode < SINGLE_INTER_MODE_NUM; ++mode) { - const int state_cnt_s = search_state->single_state_cnt[dir][mode]; - const int state_cnt_m = - search_state->single_state_modelled_cnt[dir][mode]; - SingleInterModeState *state_s = search_state->single_state[dir][mode]; - SingleInterModeState *state_m = - search_state->single_state_modelled[dir][mode]; - int count = 0; - const int max_candidates = AOMMAX(state_cnt_s, state_cnt_m); - for (i = 0; i < state_cnt_s; ++i) { - if (state_s[i].rd == INT64_MAX) break; - if (state_s[i].valid) - search_state->single_rd_order[dir][mode][count++] = - state_s[i].ref_frame; - } - if (count < max_candidates) { - for (i = 0; i < state_cnt_m; ++i) { - if (state_m[i].rd == INT64_MAX) break; - if (state_m[i].valid) { - int ref_frame = state_m[i].ref_frame; - int match = 0; - // Check if existing already - for (j = 0; j < count; ++j) { - if (search_state->single_rd_order[dir][mode][j] == ref_frame) { - match = 1; - break; - } - } - if (!match) { - // Check if this ref_frame is removed in simple rd - int valid = 1; - for (j = 0; j < state_cnt_s; j++) { - if (ref_frame == state_s[j].ref_frame && !state_s[j].valid) { - valid = 0; - break; - } - } - if (valid) - search_state->single_rd_order[dir][mode][count++] = ref_frame; - } - if (count >= max_candidates) break; - } - } - } - } - } -} - -static int compound_skip_get_candidates( - const AV1_COMP *cpi, const InterModeSearchState *search_state, - const int dir, const PREDICTION_MODE mode) { - const int mode_offset = INTER_OFFSET(mode); - const SingleInterModeState *state = - search_state->single_state[dir][mode_offset]; - const SingleInterModeState *state_modelled = - search_state->single_state_modelled[dir][mode_offset]; - int max_candidates = 0; - int candidates; - - for (int i = 0; i < FWD_REFS; ++i) { - if (search_state->single_rd_order[dir][mode_offset][i] == NONE_FRAME) break; - max_candidates++; - } - - candidates = max_candidates; - if (cpi->sf.prune_comp_search_by_single_result >= 2) { - candidates = AOMMIN(2, max_candidates); - } - if (cpi->sf.prune_comp_search_by_single_result >= 3) { - if (state[0].rd != INT64_MAX && state_modelled[0].rd != INT64_MAX && - state[0].ref_frame == state_modelled[0].ref_frame) - candidates = 1; - if (mode == NEARMV || mode == GLOBALMV) candidates = 1; - } - return candidates; -} - -static int compound_skip_by_single_states( - const AV1_COMP *cpi, const InterModeSearchState *search_state, - const PREDICTION_MODE this_mode, const MV_REFERENCE_FRAME ref_frame, - const MV_REFERENCE_FRAME second_ref_frame, const MACROBLOCK *x) { - const MV_REFERENCE_FRAME refs[2] = { ref_frame, second_ref_frame }; - const int mode[2] = { compound_ref0_mode(this_mode), - compound_ref1_mode(this_mode) }; - const int mode_offset[2] = { INTER_OFFSET(mode[0]), INTER_OFFSET(mode[1]) }; - const int mode_dir[2] = { refs[0] <= GOLDEN_FRAME ? 0 : 1, - refs[1] <= GOLDEN_FRAME ? 0 : 1 }; - int ref_searched[2] = { 0, 0 }; - int ref_mv_match[2] = { 1, 1 }; - int i, j; - - for (i = 0; i < 2; ++i) { - const SingleInterModeState *state = - search_state->single_state[mode_dir[i]][mode_offset[i]]; - const int state_cnt = - search_state->single_state_cnt[mode_dir[i]][mode_offset[i]]; - for (j = 0; j < state_cnt; ++j) { - if (state[j].ref_frame == refs[i]) { - ref_searched[i] = 1; - break; - } - } - } - - const int ref_set = get_drl_refmv_count(x, refs, this_mode); - for (i = 0; i < 2; ++i) { - if (mode[i] == NEARESTMV || mode[i] == NEARMV) { - const MV_REFERENCE_FRAME single_refs[2] = { refs[i], NONE_FRAME }; - int idential = 1; - for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ref_mv_idx++) { - int_mv single_mv; - int_mv comp_mv; - get_this_mv(&single_mv, mode[i], 0, ref_mv_idx, single_refs, - x->mbmi_ext); - get_this_mv(&comp_mv, this_mode, i, ref_mv_idx, refs, x->mbmi_ext); - - idential &= (single_mv.as_int == comp_mv.as_int); - if (!idential) { - ref_mv_match[i] = 0; - break; - } - } - } - } - - for (i = 0; i < 2; ++i) { - if (ref_searched[i] && ref_mv_match[i]) { - const int candidates = - compound_skip_get_candidates(cpi, search_state, mode_dir[i], mode[i]); - const MV_REFERENCE_FRAME *ref_order = - search_state->single_rd_order[mode_dir[i]][mode_offset[i]]; - int match = 0; - for (j = 0; j < candidates; ++j) { - if (refs[i] == ref_order[j]) { - match = 1; - break; - } - } - if (!match) return 1; - } - } - - return 0; -} - -static INLINE int sf_check_is_drop_ref(const MODE_DEFINITION *mode, - InterModeSearchState *search_state) { - const MV_REFERENCE_FRAME ref_frame = mode->ref_frame[0]; - const MV_REFERENCE_FRAME second_ref_frame = mode->ref_frame[1]; - if (search_state->num_available_refs > 2) { - if ((ref_frame == search_state->dist_order_refs[0] && - second_ref_frame == search_state->dist_order_refs[1]) || - (ref_frame == search_state->dist_order_refs[1] && - second_ref_frame == search_state->dist_order_refs[0])) - return 1; // drop this pair of refs - } - return 0; -} - -static INLINE void sf_drop_ref_analyze(InterModeSearchState *search_state, - const MODE_DEFINITION *mode, - int64_t distortion2) { - const PREDICTION_MODE this_mode = mode->mode; - MV_REFERENCE_FRAME ref_frame = mode->ref_frame[0]; - const int idx = ref_frame - LAST_FRAME; - if (idx && distortion2 > search_state->dist_refs[idx]) { - search_state->dist_refs[idx] = distortion2; - search_state->dist_order_refs[idx] = ref_frame; - } - - // Reach the last single ref prediction mode - if (ref_frame == ALTREF_FRAME && this_mode == GLOBALMV) { - // bubble sort dist_refs and the order index - for (int i = 0; i < REF_FRAMES; ++i) { - for (int k = i + 1; k < REF_FRAMES; ++k) { - if (search_state->dist_refs[i] < search_state->dist_refs[k]) { - int64_t tmp_dist = search_state->dist_refs[i]; - search_state->dist_refs[i] = search_state->dist_refs[k]; - search_state->dist_refs[k] = tmp_dist; - - int tmp_idx = search_state->dist_order_refs[i]; - search_state->dist_order_refs[i] = search_state->dist_order_refs[k]; - search_state->dist_order_refs[k] = tmp_idx; - } - } - } - for (int i = 0; i < REF_FRAMES; ++i) { - if (search_state->dist_refs[i] == -1) break; - search_state->num_available_refs = i; - } - search_state->num_available_refs++; - } -} - -static void alloc_compound_type_rd_buffers(AV1_COMMON *const cm, - CompoundTypeRdBuffers *const bufs) { - CHECK_MEM_ERROR( - cm, bufs->pred0, - (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred0))); - CHECK_MEM_ERROR( - cm, bufs->pred1, - (uint8_t *)aom_memalign(16, 2 * MAX_SB_SQUARE * sizeof(*bufs->pred1))); - CHECK_MEM_ERROR( - cm, bufs->residual1, - (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->residual1))); - CHECK_MEM_ERROR( - cm, bufs->diff10, - (int16_t *)aom_memalign(32, MAX_SB_SQUARE * sizeof(*bufs->diff10))); - CHECK_MEM_ERROR(cm, bufs->tmp_best_mask_buf, - (uint8_t *)aom_malloc(2 * MAX_SB_SQUARE * - sizeof(*bufs->tmp_best_mask_buf))); -} - -static void release_compound_type_rd_buffers( - CompoundTypeRdBuffers *const bufs) { - aom_free(bufs->pred0); - aom_free(bufs->pred1); - aom_free(bufs->residual1); - aom_free(bufs->diff10); - aom_free(bufs->tmp_best_mask_buf); - av1_zero(*bufs); // Set all pointers to NULL for safety. -} - -void av1_rd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data, - MACROBLOCK *x, int mi_row, int mi_col, - RD_STATS *rd_cost, BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { - AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - const SPEED_FEATURES *const sf = &cpi->sf; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const int try_palette = - av1_allow_palette(cm->allow_screen_content_tools, mbmi->sb_type); - PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - const struct segmentation *const seg = &cm->seg; - PREDICTION_MODE this_mode; - unsigned char segment_id = mbmi->segment_id; - int i; - struct buf_2d yv12_mb[REF_FRAMES][MAX_MB_PLANE]; - unsigned int ref_costs_single[REF_FRAMES]; - unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES]; - int *comp_inter_cost = x->comp_inter_cost[av1_get_reference_mode_context(xd)]; - int *mode_map = tile_data->mode_map[bsize]; - uint32_t mode_skip_mask[REF_FRAMES]; - uint16_t ref_frame_skip_mask[2]; - - InterModeSearchState search_state; - init_inter_mode_search_state(&search_state, cpi, tile_data, x, bsize, - best_rd_so_far); - INTERINTRA_MODE interintra_modes[REF_FRAMES] = { - INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, - INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES, INTERINTRA_MODES - }; - HandleInterModeArgs args = { - { NULL }, { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }, - { NULL }, { MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1, MAX_SB_SIZE >> 1 }, - NULL, NULL, - NULL, search_state.modelled_rd, - { { 0 } }, INT_MAX, - INT_MAX, search_state.simple_rd, - 0, interintra_modes - }; - for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX; - - av1_invalid_rd_stats(rd_cost); - - // init params, set frame modes, speed features - set_params_rd_pick_inter_mode( - cpi, x, &args, bsize, mi_row, mi_col, ref_frame_skip_mask, mode_skip_mask, - ctx->skip_ref_frame_mask, ref_costs_single, ref_costs_comp, yv12_mb); - -#if CONFIG_COLLECT_INTER_MODE_RD_STATS - int64_t best_est_rd = INT64_MAX; - // TODO(angiebird): Turn this on when this speed feature is well tested -#if 1 - const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize]; - const int do_tx_search = !md->ready; -#else - const int do_tx_search = 1; -#endif - InterModesInfo *inter_modes_info = &tile_data->inter_modes_info; - inter_modes_info->num = 0; -#endif - - int intra_mode_num = 0; - int intra_mode_idx_ls[MAX_MODES]; - int reach_first_comp_mode = 0; - - // Temporary buffers used by handle_inter_mode(). - // We allocate them once and reuse it in every call to that function. - // Note: Must be allocated on the heap due to large size of the arrays. - uint8_t *tmp_buf_orig; - CHECK_MEM_ERROR( - cm, tmp_buf_orig, - (uint8_t *)aom_memalign(32, 2 * MAX_MB_PLANE * MAX_SB_SQUARE)); - uint8_t *const tmp_buf = get_buf_by_bd(xd, tmp_buf_orig); - - CompoundTypeRdBuffers rd_buffers; - alloc_compound_type_rd_buffers(cm, &rd_buffers); - - for (int midx = 0; midx < MAX_MODES; ++midx) { - int mode_index = mode_map[midx]; - int64_t this_rd = INT64_MAX; - int disable_skip = 0; - int rate2 = 0, rate_y = 0, rate_uv = 0; - int64_t distortion2 = 0; - int skippable = 0; - int this_skip2 = 0; - const MODE_DEFINITION *mode_order = &av1_mode_order[mode_index]; - const MV_REFERENCE_FRAME ref_frame = mode_order->ref_frame[0]; - const MV_REFERENCE_FRAME second_ref_frame = mode_order->ref_frame[1]; - const int comp_pred = second_ref_frame > INTRA_FRAME; - this_mode = mode_order->mode; - - init_mbmi(mbmi, mode_index, cm); - - x->skip = 0; - set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); - - // Reach the first compound prediction mode - if (sf->prune_comp_search_by_single_result > 0 && comp_pred && - reach_first_comp_mode == 0) { - analyze_single_states(cpi, &search_state); - reach_first_comp_mode = 1; - } - const int ret = inter_mode_search_order_independent_skip( - cpi, ctx, x, bsize, mode_index, mi_row, mi_col, mode_skip_mask, - ref_frame_skip_mask, &search_state); - if (ret == 1) continue; - args.skip_motion_mode = (ret == 2); - - if (sf->drop_ref && comp_pred) { - if (sf_check_is_drop_ref(mode_order, &search_state)) { - continue; - } - } - - if (search_state.best_rd < search_state.mode_threshold[mode_index]) - continue; - - if (sf->prune_comp_search_by_single_result > 0 && comp_pred) { - if (compound_skip_by_single_states(cpi, &search_state, this_mode, - ref_frame, second_ref_frame, x)) - continue; - } - - const int ref_frame_cost = comp_pred - ? ref_costs_comp[ref_frame][second_ref_frame] - : ref_costs_single[ref_frame]; - const int compmode_cost = - is_comp_ref_allowed(mbmi->sb_type) ? comp_inter_cost[comp_pred] : 0; - const int real_compmode_cost = - cm->reference_mode == REFERENCE_MODE_SELECT ? compmode_cost : 0; - - if (comp_pred) { - if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && - search_state.best_mode_index >= 0 && - search_state.best_mbmode.ref_frame[0] == INTRA_FRAME) - continue; - } - - if (ref_frame == INTRA_FRAME) { - if (sf->adaptive_mode_search) - if ((x->source_variance << num_pels_log2_lookup[bsize]) > - search_state.best_pred_sse) - continue; - - if (this_mode != DC_PRED) { - // Only search the oblique modes if the best so far is - // one of the neighboring directional modes - if ((sf->mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && - (this_mode >= D45_PRED && this_mode <= PAETH_PRED)) { - if (search_state.best_mode_index >= 0 && - search_state.best_mbmode.ref_frame[0] > INTRA_FRAME) - continue; - } - if (sf->mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { - if (conditional_skipintra(this_mode, search_state.best_intra_mode)) - continue; - } - } - } - - // Select prediction reference frames. - for (i = 0; i < num_planes; i++) { - xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; - if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; - } - - if (ref_frame == INTRA_FRAME) { - intra_mode_idx_ls[intra_mode_num++] = mode_index; - continue; - } else { - mbmi->angle_delta[PLANE_TYPE_Y] = 0; - mbmi->angle_delta[PLANE_TYPE_UV] = 0; - mbmi->filter_intra_mode_info.use_filter_intra = 0; - mbmi->ref_mv_idx = 0; - int64_t ref_best_rd = search_state.best_rd; - { - RD_STATS rd_stats, rd_stats_y, rd_stats_uv; - av1_init_rd_stats(&rd_stats); - rd_stats.rate = rate2; - - // Point to variables that are maintained between loop iterations - args.single_newmv = search_state.single_newmv; - args.single_newmv_rate = search_state.single_newmv_rate; - args.single_newmv_valid = search_state.single_newmv_valid; - args.single_comp_cost = real_compmode_cost; - args.ref_frame_cost = ref_frame_cost; -#if CONFIG_COLLECT_INTER_MODE_RD_STATS - this_rd = handle_inter_mode( - cpi, x, bsize, &rd_stats, &rd_stats_y, &rd_stats_uv, &disable_skip, - mi_row, mi_col, &args, ref_best_rd, tmp_buf, &rd_buffers, tile_data, - &best_est_rd, do_tx_search, inter_modes_info); -#else - this_rd = handle_inter_mode(cpi, x, bsize, &rd_stats, &rd_stats_y, - &rd_stats_uv, &disable_skip, mi_row, mi_col, - &args, ref_best_rd, tmp_buf, &rd_buffers); -#endif - rate2 = rd_stats.rate; - skippable = rd_stats.skip; - distortion2 = rd_stats.dist; - rate_y = rd_stats_y.rate; - rate_uv = rd_stats_uv.rate; - } - - if (sf->prune_comp_search_by_single_result > 0 && - is_inter_singleref_mode(this_mode)) { - collect_single_states(x, &search_state, mbmi); - } - - if (this_rd == INT64_MAX) continue; - - this_skip2 = mbmi->skip; - this_rd = RDCOST(x->rdmult, rate2, distortion2); - if (this_skip2) { - rate_y = 0; - rate_uv = 0; - } - } - - // Did this mode help.. i.e. is it the new best mode - if (this_rd < search_state.best_rd || x->skip) { - int mode_excluded = 0; - if (comp_pred) { - mode_excluded = cm->reference_mode == SINGLE_REFERENCE; - } - if (!mode_excluded) { - // Note index of best mode so far - search_state.best_mode_index = mode_index; - - if (ref_frame == INTRA_FRAME) { - /* required for left and above block mv */ - mbmi->mv[0].as_int = 0; - } else { - search_state.best_pred_sse = x->pred_sse[ref_frame]; - } - - rd_cost->rate = rate2; - rd_cost->dist = distortion2; - rd_cost->rdcost = this_rd; - search_state.best_rd = this_rd; - search_state.best_mbmode = *mbmi; - search_state.best_skip2 = this_skip2; - search_state.best_mode_skippable = skippable; -#if CONFIG_COLLECT_INTER_MODE_RD_STATS - if (do_tx_search) { - // When do_tx_search == 0, handle_inter_mode won't provide correct - // rate_y and rate_uv because txfm_search process is replaced by - // rd estimation. - // Therfore, we should avoid updating best_rate_y and best_rate_uv - // here. These two values will be updated when txfm_search is called - search_state.best_rate_y = - rate_y + - x->skip_cost[av1_get_skip_context(xd)][this_skip2 || skippable]; - search_state.best_rate_uv = rate_uv; - } -#else // CONFIG_COLLECT_INTER_MODE_RD_STATS - search_state.best_rate_y = - rate_y + - x->skip_cost[av1_get_skip_context(xd)][this_skip2 || skippable]; - search_state.best_rate_uv = rate_uv; -#endif // CONFIG_COLLECT_INTER_MODE_RD_STATS - memcpy(ctx->blk_skip, x->blk_skip, - sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); - } - } - - /* keep record of best compound/single-only prediction */ - if (!disable_skip && ref_frame != INTRA_FRAME) { - int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; - - if (cm->reference_mode == REFERENCE_MODE_SELECT) { - single_rate = rate2 - compmode_cost; - hybrid_rate = rate2; - } else { - single_rate = rate2; - hybrid_rate = rate2 + compmode_cost; - } - - single_rd = RDCOST(x->rdmult, single_rate, distortion2); - hybrid_rd = RDCOST(x->rdmult, hybrid_rate, distortion2); - - if (!comp_pred) { - if (single_rd < search_state.best_pred_rd[SINGLE_REFERENCE]) - search_state.best_pred_rd[SINGLE_REFERENCE] = single_rd; - } else { - if (single_rd < search_state.best_pred_rd[COMPOUND_REFERENCE]) - search_state.best_pred_rd[COMPOUND_REFERENCE] = single_rd; - } - if (hybrid_rd < search_state.best_pred_rd[REFERENCE_MODE_SELECT]) - search_state.best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; - } - if (sf->drop_ref && second_ref_frame == NONE_FRAME) { - // Collect data from single ref mode, and analyze data. - sf_drop_ref_analyze(&search_state, mode_order, distortion2); - } - - if (x->skip && !comp_pred) break; - } - - aom_free(tmp_buf_orig); - tmp_buf_orig = NULL; - release_compound_type_rd_buffers(&rd_buffers); - -#if CONFIG_COLLECT_INTER_MODE_RD_STATS - if (!do_tx_search) { - inter_modes_info_sort(inter_modes_info, inter_modes_info->rd_idx_pair_arr); - search_state.best_rd = INT64_MAX; - - int64_t top_est_rd = - inter_modes_info->est_rd_arr[inter_modes_info->rd_idx_pair_arr[0].idx]; - for (int j = 0; j < inter_modes_info->num; ++j) { - const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx; - *mbmi = inter_modes_info->mbmi_arr[data_idx]; - int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx]; - if (curr_est_rd * 0.9 > top_est_rd) { - continue; - } - const int mode_rate = inter_modes_info->mode_rate_arr[data_idx]; - - x->skip = 0; - set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]); - - // Select prediction reference frames. - const int is_comp_pred = mbmi->ref_frame[1] > INTRA_FRAME; - for (i = 0; i < num_planes; i++) { - xd->plane[i].pre[0] = yv12_mb[mbmi->ref_frame[0]][i]; - if (is_comp_pred) xd->plane[i].pre[1] = yv12_mb[mbmi->ref_frame[1]][i]; - } - - RD_STATS rd_stats; - RD_STATS rd_stats_y; - RD_STATS rd_stats_uv; - - av1_build_inter_predictors_sb(cm, xd, mi_row, mi_col, NULL, bsize); - if (mbmi->motion_mode == OBMC_CAUSAL) - av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col); - - if (!txfm_search(cpi, x, bsize, mi_row, mi_col, &rd_stats, &rd_stats_y, - &rd_stats_uv, mode_rate, search_state.best_rd)) { - continue; - } else { - const int skip_ctx = av1_get_skip_context(xd); - inter_mode_data_push(tile_data, mbmi->sb_type, rd_stats.sse, - rd_stats.dist, - rd_stats_y.rate + rd_stats_uv.rate + - x->skip_cost[skip_ctx][mbmi->skip]); - } - rd_stats.rdcost = RDCOST(x->rdmult, rd_stats.rate, rd_stats.dist); - - if (rd_stats.rdcost < search_state.best_rd) { - search_state.best_rd = rd_stats.rdcost; - // Note index of best mode so far - const int mode_index = get_prediction_mode_idx( - mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]); - search_state.best_mode_index = mode_index; - *rd_cost = rd_stats; - search_state.best_rd = rd_stats.rdcost; - search_state.best_mbmode = *mbmi; - search_state.best_skip2 = mbmi->skip; - search_state.best_mode_skippable = rd_stats.skip; - search_state.best_rate_y = - rd_stats_y.rate + - x->skip_cost[av1_get_skip_context(xd)][rd_stats.skip || mbmi->skip]; - search_state.best_rate_uv = rd_stats_uv.rate; - memcpy(ctx->blk_skip, x->blk_skip, - sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); - } - } - } -#endif - - for (int j = 0; j < intra_mode_num; ++j) { - const int mode_index = intra_mode_idx_ls[j]; - const MV_REFERENCE_FRAME ref_frame = - av1_mode_order[mode_index].ref_frame[0]; - assert(av1_mode_order[mode_index].ref_frame[1] == NONE_FRAME); - assert(ref_frame == INTRA_FRAME); - if (sf->skip_intra_in_interframe && search_state.skip_intra_modes) break; - init_mbmi(mbmi, mode_index, cm); - x->skip = 0; - set_ref_ptrs(cm, xd, INTRA_FRAME, NONE_FRAME); - - // Select prediction reference frames. - for (i = 0; i < num_planes; i++) { - xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; - } - - RD_STATS intra_rd_stats, intra_rd_stats_y, intra_rd_stats_uv; - - const int ref_frame_cost = ref_costs_single[ref_frame]; - intra_rd_stats.rdcost = handle_intra_mode( - &search_state, cpi, x, bsize, mi_row, mi_col, ref_frame_cost, ctx, 0, - &intra_rd_stats, &intra_rd_stats_y, &intra_rd_stats_uv); - if (intra_rd_stats.rdcost < search_state.best_rd) { - search_state.best_rd = intra_rd_stats.rdcost; - // Note index of best mode so far - search_state.best_mode_index = mode_index; - *rd_cost = intra_rd_stats; - search_state.best_rd = intra_rd_stats.rdcost; - search_state.best_mbmode = *mbmi; - search_state.best_skip2 = 0; - search_state.best_mode_skippable = intra_rd_stats.skip; - search_state.best_rate_y = - intra_rd_stats_y.rate + - x->skip_cost[av1_get_skip_context(xd)][intra_rd_stats.skip]; - search_state.best_rate_uv = intra_rd_stats_uv.rate; - memcpy(ctx->blk_skip, x->blk_skip, - sizeof(x->blk_skip[0]) * ctx->num_4x4_blk); - } - } - - // In effect only when speed >= 2. - sf_refine_fast_tx_type_search( - cpi, x, mi_row, mi_col, rd_cost, bsize, ctx, search_state.best_mode_index, - &search_state.best_mbmode, yv12_mb, search_state.best_rate_y, - search_state.best_rate_uv, &search_state.best_skip2); - - // Only try palette mode when the best mode so far is an intra mode. - if (try_palette && !is_inter_mode(search_state.best_mbmode.mode)) { - search_palette_mode(cpi, x, mi_row, mi_col, rd_cost, ctx, bsize, mbmi, pmi, - ref_costs_single, &search_state); - } - - search_state.best_mbmode.skip_mode = 0; - if (cm->skip_mode_flag && - !segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && - is_comp_ref_allowed(bsize)) { - rd_pick_skip_mode(rd_cost, &search_state, cpi, x, bsize, mi_row, mi_col, - yv12_mb); - } - - // Make sure that the ref_mv_idx is only nonzero when we're - // using a mode which can support ref_mv_idx - if (search_state.best_mbmode.ref_mv_idx != 0 && - !(search_state.best_mbmode.mode == NEWMV || - search_state.best_mbmode.mode == NEW_NEWMV || - have_nearmv_in_inter_mode(search_state.best_mbmode.mode))) { - search_state.best_mbmode.ref_mv_idx = 0; - } - - if (search_state.best_mode_index < 0 || - search_state.best_rd >= best_rd_so_far) { - rd_cost->rate = INT_MAX; - rd_cost->rdcost = INT64_MAX; - return; - } - - assert( - (cm->interp_filter == SWITCHABLE) || - (cm->interp_filter == - av1_extract_interp_filter(search_state.best_mbmode.interp_filters, 0)) || - !is_inter_block(&search_state.best_mbmode)); - assert( - (cm->interp_filter == SWITCHABLE) || - (cm->interp_filter == - av1_extract_interp_filter(search_state.best_mbmode.interp_filters, 1)) || - !is_inter_block(&search_state.best_mbmode)); - - if (!cpi->rc.is_src_frame_alt_ref) - av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact, - sf->adaptive_rd_thresh, bsize, - search_state.best_mode_index); - - // macroblock modes - *mbmi = search_state.best_mbmode; - x->skip |= search_state.best_skip2; - - // Note: this section is needed since the mode may have been forced to - // GLOBALMV by the all-zero mode handling of ref-mv. - if (mbmi->mode == GLOBALMV || mbmi->mode == GLOBAL_GLOBALMV) { - // Correct the interp filters for GLOBALMV - if (is_nontrans_global_motion(xd, xd->mi[0])) { - assert(mbmi->interp_filters == - av1_broadcast_interp_filter( - av1_unswitchable_filter(cm->interp_filter))); - } - } - - for (i = 0; i < REFERENCE_MODES; ++i) { - if (search_state.best_pred_rd[i] == INT64_MAX) - search_state.best_pred_diff[i] = INT_MIN; - else - search_state.best_pred_diff[i] = - search_state.best_rd - search_state.best_pred_rd[i]; - } - - x->skip |= search_state.best_mode_skippable; - - assert(search_state.best_mode_index >= 0); - - store_coding_context(x, ctx, search_state.best_mode_index, - search_state.best_pred_diff, - search_state.best_mode_skippable); - - if (pmi->palette_size[1] > 0) { - assert(try_palette); - restore_uv_color_map(cpi, x); - } -} - -void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi, - TileDataEnc *tile_data, MACROBLOCK *x, - int mi_row, int mi_col, - RD_STATS *rd_cost, BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx, - int64_t best_rd_so_far) { - const AV1_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - unsigned char segment_id = mbmi->segment_id; - const int comp_pred = 0; - int i; - int64_t best_pred_diff[REFERENCE_MODES]; - unsigned int ref_costs_single[REF_FRAMES]; - unsigned int ref_costs_comp[REF_FRAMES][REF_FRAMES]; - int *comp_inter_cost = x->comp_inter_cost[av1_get_reference_mode_context(xd)]; - InterpFilter best_filter = SWITCHABLE; - int64_t this_rd = INT64_MAX; - int rate2 = 0; - const int64_t distortion2 = 0; - (void)mi_row; - (void)mi_col; - - av1_collect_neighbors_ref_counts(xd); - - estimate_ref_frame_costs(cm, xd, x, segment_id, ref_costs_single, - ref_costs_comp); - - for (i = 0; i < REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX; - for (i = LAST_FRAME; i < REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX; - - rd_cost->rate = INT_MAX; - - assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)); - - mbmi->palette_mode_info.palette_size[0] = 0; - mbmi->palette_mode_info.palette_size[1] = 0; - mbmi->filter_intra_mode_info.use_filter_intra = 0; - mbmi->mode = GLOBALMV; - mbmi->motion_mode = SIMPLE_TRANSLATION; - mbmi->uv_mode = UV_DC_PRED; - if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) - mbmi->ref_frame[0] = get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME); - else - mbmi->ref_frame[0] = LAST_FRAME; - mbmi->ref_frame[1] = NONE_FRAME; - mbmi->mv[0].as_int = - gm_get_motion_vector(&cm->global_motion[mbmi->ref_frame[0]], - cm->allow_high_precision_mv, bsize, mi_col, mi_row, - cm->cur_frame_force_integer_mv) - .as_int; - mbmi->tx_size = max_txsize_lookup[bsize]; - x->skip = 1; - - mbmi->ref_mv_idx = 0; - - mbmi->motion_mode = SIMPLE_TRANSLATION; - av1_count_overlappable_neighbors(cm, xd, mi_row, mi_col); - if (is_motion_variation_allowed_bsize(bsize) && !has_second_ref(mbmi)) { - int pts[SAMPLES_ARRAY_SIZE], pts_inref[SAMPLES_ARRAY_SIZE]; - mbmi->num_proj_ref = findSamples(cm, xd, mi_row, mi_col, pts, pts_inref); - // Select the samples according to motion vector difference - if (mbmi->num_proj_ref > 1) - mbmi->num_proj_ref = selectSamples(&mbmi->mv[0].as_mv, pts, pts_inref, - mbmi->num_proj_ref, bsize); - } - - set_default_interp_filters(mbmi, cm->interp_filter); - - if (cm->interp_filter != SWITCHABLE) { - best_filter = cm->interp_filter; - } else { - best_filter = EIGHTTAP_REGULAR; - if (av1_is_interp_needed(xd) && av1_is_interp_search_needed(xd) && - x->source_variance >= cpi->sf.disable_filter_search_var_thresh) { - int rs; - int best_rs = INT_MAX; - for (i = 0; i < SWITCHABLE_FILTERS; ++i) { - mbmi->interp_filters = av1_broadcast_interp_filter(i); - rs = av1_get_switchable_rate(cm, x, xd); - if (rs < best_rs) { - best_rs = rs; - best_filter = av1_extract_interp_filter(mbmi->interp_filters, 0); - } - } - } - } - // Set the appropriate filter - mbmi->interp_filters = av1_broadcast_interp_filter(best_filter); - rate2 += av1_get_switchable_rate(cm, x, xd); - - if (cm->reference_mode == REFERENCE_MODE_SELECT) - rate2 += comp_inter_cost[comp_pred]; - - // Estimate the reference frame signaling cost and add it - // to the rolling cost variable. - rate2 += ref_costs_single[LAST_FRAME]; - this_rd = RDCOST(x->rdmult, rate2, distortion2); - - rd_cost->rate = rate2; - rd_cost->dist = distortion2; - rd_cost->rdcost = this_rd; - - if (this_rd >= best_rd_so_far) { - rd_cost->rate = INT_MAX; - rd_cost->rdcost = INT64_MAX; - return; - } - - assert((cm->interp_filter == SWITCHABLE) || - (cm->interp_filter == - av1_extract_interp_filter(mbmi->interp_filters, 0))); - - av1_update_rd_thresh_fact(cm, tile_data->thresh_freq_fact, - cpi->sf.adaptive_rd_thresh, bsize, THR_GLOBALMV); - - av1_zero(best_pred_diff); - - store_coding_context(x, ctx, THR_GLOBALMV, best_pred_diff, 0); -} - -struct calc_target_weighted_pred_ctxt { - const MACROBLOCK *x; - const uint8_t *tmp; - int tmp_stride; - int overlap; -}; - -static INLINE void calc_target_weighted_pred_above( - MACROBLOCKD *xd, int rel_mi_col, uint8_t nb_mi_width, MB_MODE_INFO *nb_mi, - void *fun_ctxt, const int num_planes) { - (void)nb_mi; - (void)num_planes; - - struct calc_target_weighted_pred_ctxt *ctxt = - (struct calc_target_weighted_pred_ctxt *)fun_ctxt; - - const int bw = xd->n4_w << MI_SIZE_LOG2; - const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap); - - int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_col * MI_SIZE); - int32_t *mask = ctxt->x->mask_buf + (rel_mi_col * MI_SIZE); - const uint8_t *tmp = ctxt->tmp + rel_mi_col * MI_SIZE; - const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; - - if (!is_hbd) { - for (int row = 0; row < ctxt->overlap; ++row) { - const uint8_t m0 = mask1d[row]; - const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0; - for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) { - wsrc[col] = m1 * tmp[col]; - mask[col] = m0; - } - wsrc += bw; - mask += bw; - tmp += ctxt->tmp_stride; - } - } else { - const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp); - - for (int row = 0; row < ctxt->overlap; ++row) { - const uint8_t m0 = mask1d[row]; - const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0; - for (int col = 0; col < nb_mi_width * MI_SIZE; ++col) { - wsrc[col] = m1 * tmp16[col]; - mask[col] = m0; - } - wsrc += bw; - mask += bw; - tmp16 += ctxt->tmp_stride; - } - } -} - -static INLINE void calc_target_weighted_pred_left( - MACROBLOCKD *xd, int rel_mi_row, uint8_t nb_mi_height, MB_MODE_INFO *nb_mi, - void *fun_ctxt, const int num_planes) { - (void)nb_mi; - (void)num_planes; - - struct calc_target_weighted_pred_ctxt *ctxt = - (struct calc_target_weighted_pred_ctxt *)fun_ctxt; - - const int bw = xd->n4_w << MI_SIZE_LOG2; - const uint8_t *const mask1d = av1_get_obmc_mask(ctxt->overlap); - - int32_t *wsrc = ctxt->x->wsrc_buf + (rel_mi_row * MI_SIZE * bw); - int32_t *mask = ctxt->x->mask_buf + (rel_mi_row * MI_SIZE * bw); - const uint8_t *tmp = ctxt->tmp + (rel_mi_row * MI_SIZE * ctxt->tmp_stride); - const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; - - if (!is_hbd) { - for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) { - for (int col = 0; col < ctxt->overlap; ++col) { - const uint8_t m0 = mask1d[col]; - const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0; - wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 + - (tmp[col] << AOM_BLEND_A64_ROUND_BITS) * m1; - mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0; - } - wsrc += bw; - mask += bw; - tmp += ctxt->tmp_stride; - } - } else { - const uint16_t *tmp16 = CONVERT_TO_SHORTPTR(tmp); - - for (int row = 0; row < nb_mi_height * MI_SIZE; ++row) { - for (int col = 0; col < ctxt->overlap; ++col) { - const uint8_t m0 = mask1d[col]; - const uint8_t m1 = AOM_BLEND_A64_MAX_ALPHA - m0; - wsrc[col] = (wsrc[col] >> AOM_BLEND_A64_ROUND_BITS) * m0 + - (tmp16[col] << AOM_BLEND_A64_ROUND_BITS) * m1; - mask[col] = (mask[col] >> AOM_BLEND_A64_ROUND_BITS) * m0; - } - wsrc += bw; - mask += bw; - tmp16 += ctxt->tmp_stride; - } - } -} - -// This function has a structure similar to av1_build_obmc_inter_prediction -// -// The OBMC predictor is computed as: -// -// PObmc(x,y) = -// AOM_BLEND_A64(Mh(x), -// AOM_BLEND_A64(Mv(y), P(x,y), PAbove(x,y)), -// PLeft(x, y)) -// -// Scaling up by AOM_BLEND_A64_MAX_ALPHA ** 2 and omitting the intermediate -// rounding, this can be written as: -// -// AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * Pobmc(x,y) = -// Mh(x) * Mv(y) * P(x,y) + -// Mh(x) * Cv(y) * Pabove(x,y) + -// AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y) -// -// Where : -// -// Cv(y) = AOM_BLEND_A64_MAX_ALPHA - Mv(y) -// Ch(y) = AOM_BLEND_A64_MAX_ALPHA - Mh(y) -// -// This function computes 'wsrc' and 'mask' as: -// -// wsrc(x, y) = -// AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA * src(x, y) - -// Mh(x) * Cv(y) * Pabove(x,y) + -// AOM_BLEND_A64_MAX_ALPHA * Ch(x) * PLeft(x, y) -// -// mask(x, y) = Mh(x) * Mv(y) -// -// These can then be used to efficiently approximate the error for any -// predictor P in the context of the provided neighbouring predictors by -// computing: -// -// error(x, y) = -// wsrc(x, y) - mask(x, y) * P(x, y) / (AOM_BLEND_A64_MAX_ALPHA ** 2) -// -static void calc_target_weighted_pred(const AV1_COMMON *cm, const MACROBLOCK *x, - const MACROBLOCKD *xd, int mi_row, - int mi_col, const uint8_t *above, - int above_stride, const uint8_t *left, - int left_stride) { - const BLOCK_SIZE bsize = xd->mi[0]->sb_type; - const int bw = xd->n4_w << MI_SIZE_LOG2; - const int bh = xd->n4_h << MI_SIZE_LOG2; - int32_t *mask_buf = x->mask_buf; - int32_t *wsrc_buf = x->wsrc_buf; - - const int is_hbd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0; - const int src_scale = AOM_BLEND_A64_MAX_ALPHA * AOM_BLEND_A64_MAX_ALPHA; - - // plane 0 should not be subsampled - assert(xd->plane[0].subsampling_x == 0); - assert(xd->plane[0].subsampling_y == 0); - - av1_zero_array(wsrc_buf, bw * bh); - for (int i = 0; i < bw * bh; ++i) mask_buf[i] = AOM_BLEND_A64_MAX_ALPHA; - - // handle above row - if (xd->up_available) { - const int overlap = - AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1; - struct calc_target_weighted_pred_ctxt ctxt = { x, above, above_stride, - overlap }; - foreach_overlappable_nb_above(cm, (MACROBLOCKD *)xd, mi_col, - max_neighbor_obmc[mi_size_wide_log2[bsize]], - calc_target_weighted_pred_above, &ctxt); - } - - for (int i = 0; i < bw * bh; ++i) { - wsrc_buf[i] *= AOM_BLEND_A64_MAX_ALPHA; - mask_buf[i] *= AOM_BLEND_A64_MAX_ALPHA; - } - - // handle left column - if (xd->left_available) { - const int overlap = - AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1; - struct calc_target_weighted_pred_ctxt ctxt = { x, left, left_stride, - overlap }; - foreach_overlappable_nb_left(cm, (MACROBLOCKD *)xd, mi_row, - max_neighbor_obmc[mi_size_high_log2[bsize]], - calc_target_weighted_pred_left, &ctxt); - } - - if (!is_hbd) { - const uint8_t *src = x->plane[0].src.buf; - - for (int row = 0; row < bh; ++row) { - for (int col = 0; col < bw; ++col) { - wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col]; - } - wsrc_buf += bw; - src += x->plane[0].src.stride; - } - } else { - const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[0].src.buf); - - for (int row = 0; row < bh; ++row) { - for (int col = 0; col < bw; ++col) { - wsrc_buf[col] = src[col] * src_scale - wsrc_buf[col]; - } - wsrc_buf += bw; - src += x->plane[0].src.stride; - } - } -} diff --git a/third_party/aom/av1/encoder/rdopt.h b/third_party/aom/av1/encoder/rdopt.h deleted file mode 100644 index 4c11f90b8..000000000 --- a/third_party/aom/av1/encoder/rdopt.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_RDOPT_H_ -#define AOM_AV1_ENCODER_RDOPT_H_ - -#include "av1/common/blockd.h" -#include "av1/common/txb_common.h" - -#include "av1/encoder/block.h" -#include "av1/encoder/context_tree.h" -#include "av1/encoder/encoder.h" -#include "av1/encoder/encodetxb.h" - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_REF_MV_SERCH 3 -#define DEFAULT_LUMA_INTERP_SKIP_FLAG 1 -#define DEFAULT_CHROMA_INTERP_SKIP_FLAG 2 -#define DEFAULT_INTERP_SKIP_FLAG \ - (DEFAULT_LUMA_INTERP_SKIP_FLAG | DEFAULT_CHROMA_INTERP_SKIP_FLAG) - -struct TileInfo; -struct macroblock; -struct RD_STATS; - -#if CONFIG_RD_DEBUG -static INLINE void av1_update_txb_coeff_cost(RD_STATS *rd_stats, int plane, - TX_SIZE tx_size, int blk_row, - int blk_col, int txb_coeff_cost) { - (void)blk_row; - (void)blk_col; - (void)tx_size; - rd_stats->txb_coeff_cost[plane] += txb_coeff_cost; - - { - const int txb_h = tx_size_high_unit[tx_size]; - const int txb_w = tx_size_wide_unit[tx_size]; - int idx, idy; - for (idy = 0; idy < txb_h; ++idy) - for (idx = 0; idx < txb_w; ++idx) - rd_stats->txb_coeff_cost_map[plane][blk_row + idy][blk_col + idx] = 0; - - rd_stats->txb_coeff_cost_map[plane][blk_row][blk_col] = txb_coeff_cost; - } - assert(blk_row < TXB_COEFF_COST_MAP_SIZE); - assert(blk_col < TXB_COEFF_COST_MAP_SIZE); -} -#endif - -// Returns the number of colors in 'src'. -int av1_count_colors(const uint8_t *src, int stride, int rows, int cols, - int *val_count); -// Same as av1_count_colors(), but for high-bitdepth mode. -int av1_count_colors_highbd(const uint8_t *src8, int stride, int rows, int cols, - int bit_depth, int *val_count); - -#if CONFIG_DIST_8X8 -int64_t av1_dist_8x8(const struct AV1_COMP *const cpi, const MACROBLOCK *x, - const uint8_t *src, int src_stride, const uint8_t *dst, - int dst_stride, const BLOCK_SIZE tx_bsize, int bsw, - int bsh, int visible_w, int visible_h, int qindex); -#endif - -static INLINE int av1_cost_skip_txb(MACROBLOCK *x, const TXB_CTX *const txb_ctx, - int plane, TX_SIZE tx_size) { - const TX_SIZE txs_ctx = get_txsize_entropy_ctx(tx_size); - const PLANE_TYPE plane_type = get_plane_type(plane); - const LV_MAP_COEFF_COST *const coeff_costs = - &x->coeff_costs[txs_ctx][plane_type]; - return coeff_costs->txb_skip_cost[txb_ctx->txb_skip_ctx][1]; -} - -static INLINE int av1_cost_coeffs(const AV1_COMMON *const cm, MACROBLOCK *x, - int plane, int block, TX_SIZE tx_size, - const TX_TYPE tx_type, - const TXB_CTX *const txb_ctx, - int use_fast_coef_costing) { -#if TXCOEFF_COST_TIMER - struct aom_usec_timer timer; - aom_usec_timer_start(&timer); -#endif - (void)use_fast_coef_costing; - const int cost = - av1_cost_coeffs_txb(cm, x, plane, block, tx_size, tx_type, txb_ctx); -#if TXCOEFF_COST_TIMER - AV1_COMMON *tmp_cm = (AV1_COMMON *)&cpi->common; - aom_usec_timer_mark(&timer); - const int64_t elapsed_time = aom_usec_timer_elapsed(&timer); - tmp_cm->txcoeff_cost_timer += elapsed_time; - ++tmp_cm->txcoeff_cost_count; -#endif - return cost; -} - -void av1_rd_pick_intra_mode_sb(const struct AV1_COMP *cpi, struct macroblock *x, - int mi_row, int mi_col, struct RD_STATS *rd_cost, - BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, - int64_t best_rd); - -unsigned int av1_get_sby_perpixel_variance(const struct AV1_COMP *cpi, - const struct buf_2d *ref, - BLOCK_SIZE bs); -unsigned int av1_high_get_sby_perpixel_variance(const struct AV1_COMP *cpi, - const struct buf_2d *ref, - BLOCK_SIZE bs, int bd); - -void av1_rd_pick_inter_mode_sb(struct AV1_COMP *cpi, - struct TileDataEnc *tile_data, - struct macroblock *x, int mi_row, int mi_col, - struct RD_STATS *rd_cost, BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far); - -void av1_rd_pick_inter_mode_sb_seg_skip( - const struct AV1_COMP *cpi, struct TileDataEnc *tile_data, - struct macroblock *x, int mi_row, int mi_col, struct RD_STATS *rd_cost, - BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far); - -#if CONFIG_COLLECT_INTER_MODE_RD_STATS -void av1_inter_mode_data_init(struct TileDataEnc *tile_data); -void av1_inter_mode_data_fit(TileDataEnc *tile_data, int rdmult); -#endif - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_RDOPT_H_ diff --git a/third_party/aom/av1/encoder/reconinter_enc.c b/third_party/aom/av1/encoder/reconinter_enc.c deleted file mode 100644 index 23d920fc3..000000000 --- a/third_party/aom/av1/encoder/reconinter_enc.c +++ /dev/null @@ -1,627 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include - -#include "config/aom_config.h" -#include "config/aom_dsp_rtcd.h" -#include "config/aom_scale_rtcd.h" - -#include "aom/aom_integer.h" -#include "aom_dsp/blend.h" - -#include "av1/common/blockd.h" -#include "av1/common/mvref_common.h" -#include "av1/common/reconinter.h" -#include "av1/common/reconintra.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/obmc.h" -#include "av1/encoder/reconinter_enc.h" - -static INLINE void calc_subpel_params( - MACROBLOCKD *xd, const struct scale_factors *const sf, const MV mv, - int plane, const int pre_x, const int pre_y, int x, int y, - struct buf_2d *const pre_buf, uint8_t **pre, SubpelParams *subpel_params, - int bw, int bh) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const int is_scaled = av1_is_scaled(sf); - if (is_scaled) { - int ssx = pd->subsampling_x; - int ssy = pd->subsampling_y; - int orig_pos_y = (pre_y + y) << SUBPEL_BITS; - orig_pos_y += mv.row * (1 << (1 - ssy)); - int orig_pos_x = (pre_x + x) << SUBPEL_BITS; - orig_pos_x += mv.col * (1 << (1 - ssx)); - int pos_y = sf->scale_value_y(orig_pos_y, sf); - int pos_x = sf->scale_value_x(orig_pos_x, sf); - pos_x += SCALE_EXTRA_OFF; - pos_y += SCALE_EXTRA_OFF; - - const int top = -AOM_LEFT_TOP_MARGIN_SCALED(ssy); - const int left = -AOM_LEFT_TOP_MARGIN_SCALED(ssx); - const int bottom = (pre_buf->height + AOM_INTERP_EXTEND) - << SCALE_SUBPEL_BITS; - const int right = (pre_buf->width + AOM_INTERP_EXTEND) << SCALE_SUBPEL_BITS; - pos_y = clamp(pos_y, top, bottom); - pos_x = clamp(pos_x, left, right); - - *pre = pre_buf->buf0 + (pos_y >> SCALE_SUBPEL_BITS) * pre_buf->stride + - (pos_x >> SCALE_SUBPEL_BITS); - subpel_params->subpel_x = pos_x & SCALE_SUBPEL_MASK; - subpel_params->subpel_y = pos_y & SCALE_SUBPEL_MASK; - subpel_params->xs = sf->x_step_q4; - subpel_params->ys = sf->y_step_q4; - } else { - const MV mv_q4 = clamp_mv_to_umv_border_sb( - xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); - subpel_params->xs = subpel_params->ys = SCALE_SUBPEL_SHIFTS; - subpel_params->subpel_x = (mv_q4.col & SUBPEL_MASK) << SCALE_EXTRA_BITS; - subpel_params->subpel_y = (mv_q4.row & SUBPEL_MASK) << SCALE_EXTRA_BITS; - *pre = pre_buf->buf + (y + (mv_q4.row >> SUBPEL_BITS)) * pre_buf->stride + - (x + (mv_q4.col >> SUBPEL_BITS)); - } -} - -static INLINE void build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd, - int plane, const MB_MODE_INFO *mi, - int build_for_obmc, int bw, int bh, - int mi_x, int mi_y) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - int is_compound = has_second_ref(mi); - int ref; - const int is_intrabc = is_intrabc_block(mi); - assert(IMPLIES(is_intrabc, !is_compound)); - int is_global[2] = { 0, 0 }; - for (ref = 0; ref < 1 + is_compound; ++ref) { - const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]]; - is_global[ref] = is_global_mv_block(mi, wm->wmtype); - } - - const BLOCK_SIZE bsize = mi->sb_type; - const int ss_x = pd->subsampling_x; - const int ss_y = pd->subsampling_y; - int sub8x8_inter = (block_size_wide[bsize] < 8 && ss_x) || - (block_size_high[bsize] < 8 && ss_y); - - if (is_intrabc) sub8x8_inter = 0; - - // For sub8x8 chroma blocks, we may be covering more than one luma block's - // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for - // the top-left corner of the prediction source - the correct top-left corner - // is at (pre_x, pre_y). - const int row_start = - (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0; - const int col_start = - (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0; - const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x; - const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y; - - sub8x8_inter = sub8x8_inter && !build_for_obmc; - if (sub8x8_inter) { - for (int row = row_start; row <= 0 && sub8x8_inter; ++row) { - for (int col = col_start; col <= 0; ++col) { - const MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col]; - if (!is_inter_block(this_mbmi)) sub8x8_inter = 0; - if (is_intrabc_block(this_mbmi)) sub8x8_inter = 0; - } - } - } - - if (sub8x8_inter) { - // block size - const int b4_w = block_size_wide[bsize] >> ss_x; - const int b4_h = block_size_high[bsize] >> ss_y; - const BLOCK_SIZE plane_bsize = scale_chroma_bsize(bsize, ss_x, ss_y); - const int b8_w = block_size_wide[plane_bsize] >> ss_x; - const int b8_h = block_size_high[plane_bsize] >> ss_y; - assert(!is_compound); - - const struct buf_2d orig_pred_buf[2] = { pd->pre[0], pd->pre[1] }; - - int row = row_start; - for (int y = 0; y < b8_h; y += b4_h) { - int col = col_start; - for (int x = 0; x < b8_w; x += b4_w) { - MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col]; - is_compound = has_second_ref(this_mbmi); - int tmp_dst_stride = 8; - assert(bw < 8 || bh < 8); - ConvolveParams conv_params = get_conv_params_no_round( - 0, plane, xd->tmp_conv_dst, tmp_dst_stride, is_compound, xd->bd); - conv_params.use_jnt_comp_avg = 0; - struct buf_2d *const dst_buf = &pd->dst; - uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x; - - ref = 0; - const RefBuffer *ref_buf = - &cm->frame_refs[this_mbmi->ref_frame[ref] - LAST_FRAME]; - - pd->pre[ref].buf0 = - (plane == 1) ? ref_buf->buf->u_buffer : ref_buf->buf->v_buffer; - pd->pre[ref].buf = - pd->pre[ref].buf0 + scaled_buffer_offset(pre_x, pre_y, - ref_buf->buf->uv_stride, - &ref_buf->sf); - pd->pre[ref].width = ref_buf->buf->uv_crop_width; - pd->pre[ref].height = ref_buf->buf->uv_crop_height; - pd->pre[ref].stride = ref_buf->buf->uv_stride; - - const struct scale_factors *const sf = - is_intrabc ? &cm->sf_identity : &ref_buf->sf; - struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref]; - - const MV mv = this_mbmi->mv[ref].as_mv; - - uint8_t *pre; - SubpelParams subpel_params; - WarpTypesAllowed warp_types; - warp_types.global_warp_allowed = is_global[ref]; - warp_types.local_warp_allowed = this_mbmi->motion_mode == WARPED_CAUSAL; - - calc_subpel_params(xd, sf, mv, plane, pre_x, pre_y, x, y, pre_buf, &pre, - &subpel_params, bw, bh); - conv_params.do_average = ref; - if (is_masked_compound_type(mi->interinter_comp.type)) { - // masked compound type has its own average mechanism - conv_params.do_average = 0; - } - - av1_make_inter_predictor( - pre, pre_buf->stride, dst, dst_buf->stride, &subpel_params, sf, - b4_w, b4_h, &conv_params, this_mbmi->interp_filters, &warp_types, - (mi_x >> pd->subsampling_x) + x, (mi_y >> pd->subsampling_y) + y, - plane, ref, mi, build_for_obmc, xd, cm->allow_warped_motion); - - ++col; - } - ++row; - } - - for (ref = 0; ref < 2; ++ref) pd->pre[ref] = orig_pred_buf[ref]; - return; - } - - { - ConvolveParams conv_params = get_conv_params_no_round( - 0, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd); - av1_jnt_comp_weight_assign(cm, mi, 0, &conv_params.fwd_offset, - &conv_params.bck_offset, - &conv_params.use_jnt_comp_avg, is_compound); - - struct buf_2d *const dst_buf = &pd->dst; - uint8_t *const dst = dst_buf->buf; - for (ref = 0; ref < 1 + is_compound; ++ref) { - const struct scale_factors *const sf = - is_intrabc ? &cm->sf_identity : &xd->block_refs[ref]->sf; - struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref]; - const MV mv = mi->mv[ref].as_mv; - - uint8_t *pre; - SubpelParams subpel_params; - calc_subpel_params(xd, sf, mv, plane, pre_x, pre_y, 0, 0, pre_buf, &pre, - &subpel_params, bw, bh); - - WarpTypesAllowed warp_types; - warp_types.global_warp_allowed = is_global[ref]; - warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL; - - if (ref && is_masked_compound_type(mi->interinter_comp.type)) { - // masked compound type has its own average mechanism - conv_params.do_average = 0; - av1_make_masked_inter_predictor( - pre, pre_buf->stride, dst, dst_buf->stride, &subpel_params, sf, bw, - bh, &conv_params, mi->interp_filters, plane, &warp_types, - mi_x >> pd->subsampling_x, mi_y >> pd->subsampling_y, ref, xd, - cm->allow_warped_motion); - } else { - conv_params.do_average = ref; - av1_make_inter_predictor( - pre, pre_buf->stride, dst, dst_buf->stride, &subpel_params, sf, bw, - bh, &conv_params, mi->interp_filters, &warp_types, - mi_x >> pd->subsampling_x, mi_y >> pd->subsampling_y, plane, ref, - mi, build_for_obmc, xd, cm->allow_warped_motion); - } - } - } -} - -static void build_inter_predictors_for_planes(const AV1_COMMON *cm, - MACROBLOCKD *xd, BLOCK_SIZE bsize, - int mi_row, int mi_col, - int plane_from, int plane_to) { - int plane; - const int mi_x = mi_col * MI_SIZE; - const int mi_y = mi_row * MI_SIZE; - for (plane = plane_from; plane <= plane_to; ++plane) { - const struct macroblockd_plane *pd = &xd->plane[plane]; - const int bw = pd->width; - const int bh = pd->height; - - if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x, - pd->subsampling_y)) - continue; - - build_inter_predictors(cm, xd, plane, xd->mi[0], 0, bw, bh, mi_x, mi_y); - } -} - -void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, BUFFER_SET *ctx, - BLOCK_SIZE bsize) { - av1_build_inter_predictors_sbp(cm, xd, mi_row, mi_col, ctx, bsize, 0); -} - -void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, BUFFER_SET *ctx, - BLOCK_SIZE bsize) { - for (int plane_idx = 1; plane_idx < MAX_MB_PLANE; plane_idx++) { - av1_build_inter_predictors_sbp(cm, xd, mi_row, mi_col, ctx, bsize, - plane_idx); - } -} - -void av1_build_inter_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, BUFFER_SET *ctx, - BLOCK_SIZE bsize, int plane_idx) { - build_inter_predictors_for_planes(cm, xd, bsize, mi_row, mi_col, plane_idx, - plane_idx); - - if (is_interintra_pred(xd->mi[0])) { - BUFFER_SET default_ctx = { { NULL, NULL, NULL }, { 0, 0, 0 } }; - if (!ctx) { - default_ctx.plane[plane_idx] = xd->plane[plane_idx].dst.buf; - default_ctx.stride[plane_idx] = xd->plane[plane_idx].dst.stride; - ctx = &default_ctx; - } - av1_build_interintra_predictors_sbp(cm, xd, xd->plane[plane_idx].dst.buf, - xd->plane[plane_idx].dst.stride, ctx, - plane_idx, bsize); - } -} - -void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, BUFFER_SET *ctx, - BLOCK_SIZE bsize) { - const int num_planes = av1_num_planes(cm); - av1_build_inter_predictors_sby(cm, xd, mi_row, mi_col, ctx, bsize); - if (num_planes > 1) - av1_build_inter_predictors_sbuv(cm, xd, mi_row, mi_col, ctx, bsize); -} - -// TODO(sarahparker): -// av1_build_inter_predictor should be combined with -// av1_make_inter_predictor -void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, const MV *src_mv, - const struct scale_factors *sf, int w, int h, - ConvolveParams *conv_params, - InterpFilters interp_filters, - const WarpTypesAllowed *warp_types, int p_col, - int p_row, int plane, int ref, - enum mv_precision precision, int x, int y, - const MACROBLOCKD *xd, int can_use_previous) { - const int is_q4 = precision == MV_PRECISION_Q4; - const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, - is_q4 ? src_mv->col : src_mv->col * 2 }; - MV32 mv = av1_scale_mv(&mv_q4, x, y, sf); - mv.col += SCALE_EXTRA_OFF; - mv.row += SCALE_EXTRA_OFF; - - const SubpelParams subpel_params = { sf->x_step_q4, sf->y_step_q4, - mv.col & SCALE_SUBPEL_MASK, - mv.row & SCALE_SUBPEL_MASK }; - src += (mv.row >> SCALE_SUBPEL_BITS) * src_stride + - (mv.col >> SCALE_SUBPEL_BITS); - - av1_make_inter_predictor(src, src_stride, dst, dst_stride, &subpel_params, sf, - w, h, conv_params, interp_filters, warp_types, p_col, - p_row, plane, ref, xd->mi[0], 0, xd, - can_use_previous); -} - -static INLINE void build_prediction_by_above_pred( - MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width, - MB_MODE_INFO *above_mbmi, void *fun_ctxt, const int num_planes) { - struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt; - const int above_mi_col = ctxt->mi_col + rel_mi_col; - int mi_x, mi_y; - MB_MODE_INFO backup_mbmi = *above_mbmi; - - av1_setup_build_prediction_by_above_pred(xd, rel_mi_col, above_mi_width, - above_mbmi, ctxt, num_planes); - mi_x = above_mi_col << MI_SIZE_LOG2; - mi_y = ctxt->mi_row << MI_SIZE_LOG2; - - const BLOCK_SIZE bsize = xd->mi[0]->sb_type; - - for (int j = 0; j < num_planes; ++j) { - const struct macroblockd_plane *pd = &xd->plane[j]; - int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x; - int bh = clamp(block_size_high[bsize] >> (pd->subsampling_y + 1), 4, - block_size_high[BLOCK_64X64] >> (pd->subsampling_y + 1)); - - if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue; - build_inter_predictors(ctxt->cm, xd, j, above_mbmi, 1, bw, bh, mi_x, mi_y); - } - *above_mbmi = backup_mbmi; -} - -void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, - uint8_t *tmp_buf[MAX_MB_PLANE], - int tmp_width[MAX_MB_PLANE], - int tmp_height[MAX_MB_PLANE], - int tmp_stride[MAX_MB_PLANE]) { - if (!xd->up_available) return; - - // Adjust mb_to_bottom_edge to have the correct value for the OBMC - // prediction block. This is half the height of the original block, - // except for 128-wide blocks, where we only use a height of 32. - int this_height = xd->n4_h * MI_SIZE; - int pred_height = AOMMIN(this_height / 2, 32); - xd->mb_to_bottom_edge += (this_height - pred_height) * 8; - - struct build_prediction_ctxt ctxt = { cm, mi_row, - mi_col, tmp_buf, - tmp_width, tmp_height, - tmp_stride, xd->mb_to_right_edge }; - BLOCK_SIZE bsize = xd->mi[0]->sb_type; - foreach_overlappable_nb_above(cm, xd, mi_col, - max_neighbor_obmc[mi_size_wide_log2[bsize]], - build_prediction_by_above_pred, &ctxt); - - xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); - xd->mb_to_right_edge = ctxt.mb_to_far_edge; - xd->mb_to_bottom_edge -= (this_height - pred_height) * 8; -} - -static INLINE void build_prediction_by_left_pred( - MACROBLOCKD *xd, int rel_mi_row, uint8_t left_mi_height, - MB_MODE_INFO *left_mbmi, void *fun_ctxt, const int num_planes) { - struct build_prediction_ctxt *ctxt = (struct build_prediction_ctxt *)fun_ctxt; - const int left_mi_row = ctxt->mi_row + rel_mi_row; - int mi_x, mi_y; - MB_MODE_INFO backup_mbmi = *left_mbmi; - - av1_setup_build_prediction_by_left_pred(xd, rel_mi_row, left_mi_height, - left_mbmi, ctxt, num_planes); - mi_x = ctxt->mi_col << MI_SIZE_LOG2; - mi_y = left_mi_row << MI_SIZE_LOG2; - const BLOCK_SIZE bsize = xd->mi[0]->sb_type; - - for (int j = 0; j < num_planes; ++j) { - const struct macroblockd_plane *pd = &xd->plane[j]; - int bw = clamp(block_size_wide[bsize] >> (pd->subsampling_x + 1), 4, - block_size_wide[BLOCK_64X64] >> (pd->subsampling_x + 1)); - int bh = (left_mi_height << MI_SIZE_LOG2) >> pd->subsampling_y; - - if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue; - build_inter_predictors(ctxt->cm, xd, j, left_mbmi, 1, bw, bh, mi_x, mi_y); - } - *left_mbmi = backup_mbmi; -} - -void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, - uint8_t *tmp_buf[MAX_MB_PLANE], - int tmp_width[MAX_MB_PLANE], - int tmp_height[MAX_MB_PLANE], - int tmp_stride[MAX_MB_PLANE]) { - if (!xd->left_available) return; - - // Adjust mb_to_right_edge to have the correct value for the OBMC - // prediction block. This is half the width of the original block, - // except for 128-wide blocks, where we only use a width of 32. - int this_width = xd->n4_w * MI_SIZE; - int pred_width = AOMMIN(this_width / 2, 32); - xd->mb_to_right_edge += (this_width - pred_width) * 8; - - struct build_prediction_ctxt ctxt = { cm, mi_row, - mi_col, tmp_buf, - tmp_width, tmp_height, - tmp_stride, xd->mb_to_bottom_edge }; - BLOCK_SIZE bsize = xd->mi[0]->sb_type; - foreach_overlappable_nb_left(cm, xd, mi_row, - max_neighbor_obmc[mi_size_high_log2[bsize]], - build_prediction_by_left_pred, &ctxt); - - xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); - xd->mb_to_right_edge -= (this_width - pred_width) * 8; - xd->mb_to_bottom_edge = ctxt.mb_to_far_edge; -} - -void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col) { - const int num_planes = av1_num_planes(cm); - uint8_t *dst_buf1[MAX_MB_PLANE], *dst_buf2[MAX_MB_PLANE]; - int dst_stride1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; - int dst_stride2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; - int dst_width1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; - int dst_width2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; - int dst_height1[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; - int dst_height2[MAX_MB_PLANE] = { MAX_SB_SIZE, MAX_SB_SIZE, MAX_SB_SIZE }; - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - int len = sizeof(uint16_t); - dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]); - dst_buf1[1] = - CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * len); - dst_buf1[2] = - CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2 * len); - dst_buf2[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1]); - dst_buf2[1] = - CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * len); - dst_buf2[2] = - CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len); - } else { - dst_buf1[0] = xd->tmp_obmc_bufs[0]; - dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE; - dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2; - dst_buf2[0] = xd->tmp_obmc_bufs[1]; - dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE; - dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2; - } - av1_build_prediction_by_above_preds(cm, xd, mi_row, mi_col, dst_buf1, - dst_width1, dst_height1, dst_stride1); - av1_build_prediction_by_left_preds(cm, xd, mi_row, mi_col, dst_buf2, - dst_width2, dst_height2, dst_stride2); - av1_setup_dst_planes(xd->plane, xd->mi[0]->sb_type, get_frame_new_buffer(cm), - mi_row, mi_col, 0, num_planes); - av1_build_obmc_inter_prediction(cm, xd, mi_row, mi_col, dst_buf1, dst_stride1, - dst_buf2, dst_stride2); -} - -// Builds the inter-predictor for the single ref case -// for use in the encoder to search the wedges efficiently. -static void build_inter_predictors_single_buf(MACROBLOCKD *xd, int plane, - int bw, int bh, int x, int y, - int w, int h, int mi_x, int mi_y, - int ref, uint8_t *const ext_dst, - int ext_dst_stride, - int can_use_previous) { - struct macroblockd_plane *const pd = &xd->plane[plane]; - const MB_MODE_INFO *mi = xd->mi[0]; - - const struct scale_factors *const sf = &xd->block_refs[ref]->sf; - struct buf_2d *const pre_buf = &pd->pre[ref]; - uint8_t *const dst = get_buf_by_bd(xd, ext_dst) + ext_dst_stride * y + x; - const MV mv = mi->mv[ref].as_mv; - - ConvolveParams conv_params = get_conv_params(0, plane, xd->bd); - WarpTypesAllowed warp_types; - const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]]; - warp_types.global_warp_allowed = is_global_mv_block(mi, wm->wmtype); - warp_types.local_warp_allowed = mi->motion_mode == WARPED_CAUSAL; - const int pre_x = (mi_x) >> pd->subsampling_x; - const int pre_y = (mi_y) >> pd->subsampling_y; - uint8_t *pre; - SubpelParams subpel_params; - calc_subpel_params(xd, sf, mv, plane, pre_x, pre_y, x, y, pre_buf, &pre, - &subpel_params, bw, bh); - - av1_make_inter_predictor(pre, pre_buf->stride, dst, ext_dst_stride, - &subpel_params, sf, w, h, &conv_params, - mi->interp_filters, &warp_types, pre_x + x, - pre_y + y, plane, ref, mi, 0, xd, can_use_previous); -} - -void av1_build_inter_predictors_for_planes_single_buf( - MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row, - int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3], - int can_use_previous) { - int plane; - const int mi_x = mi_col * MI_SIZE; - const int mi_y = mi_row * MI_SIZE; - for (plane = plane_from; plane <= plane_to; ++plane) { - const BLOCK_SIZE plane_bsize = get_plane_block_size( - bsize, xd->plane[plane].subsampling_x, xd->plane[plane].subsampling_y); - const int bw = block_size_wide[plane_bsize]; - const int bh = block_size_high[plane_bsize]; - build_inter_predictors_single_buf(xd, plane, bw, bh, 0, 0, bw, bh, mi_x, - mi_y, ref, ext_dst[plane], - ext_dst_stride[plane], can_use_previous); - } -} - -static void build_masked_compound( - uint8_t *dst, int dst_stride, const uint8_t *src0, int src0_stride, - const uint8_t *src1, int src1_stride, - const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h, - int w) { - // Derive subsampling from h and w passed in. May be refactored to - // pass in subsampling factors directly. - const int subh = (2 << mi_size_high_log2[sb_type]) == h; - const int subw = (2 << mi_size_wide_log2[sb_type]) == w; - const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type); - aom_blend_a64_mask(dst, dst_stride, src0, src0_stride, src1, src1_stride, - mask, block_size_wide[sb_type], w, h, subw, subh); -} - -static void build_masked_compound_highbd( - uint8_t *dst_8, int dst_stride, const uint8_t *src0_8, int src0_stride, - const uint8_t *src1_8, int src1_stride, - const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h, - int w, int bd) { - // Derive subsampling from h and w passed in. May be refactored to - // pass in subsampling factors directly. - const int subh = (2 << mi_size_high_log2[sb_type]) == h; - const int subw = (2 << mi_size_wide_log2[sb_type]) == w; - const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type); - // const uint8_t *mask = - // av1_get_contiguous_soft_mask(wedge_index, wedge_sign, sb_type); - aom_highbd_blend_a64_mask(dst_8, dst_stride, src0_8, src0_stride, src1_8, - src1_stride, mask, block_size_wide[sb_type], w, h, - subw, subh, bd); -} - -static void build_wedge_inter_predictor_from_buf( - MACROBLOCKD *xd, int plane, int x, int y, int w, int h, uint8_t *ext_dst0, - int ext_dst_stride0, uint8_t *ext_dst1, int ext_dst_stride1) { - MB_MODE_INFO *const mbmi = xd->mi[0]; - const int is_compound = has_second_ref(mbmi); - MACROBLOCKD_PLANE *const pd = &xd->plane[plane]; - struct buf_2d *const dst_buf = &pd->dst; - uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; - mbmi->interinter_comp.seg_mask = xd->seg_mask; - const INTERINTER_COMPOUND_DATA *comp_data = &mbmi->interinter_comp; - - if (is_compound && is_masked_compound_type(comp_data->type)) { - if (!plane && comp_data->type == COMPOUND_DIFFWTD) { - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - av1_build_compound_diffwtd_mask_highbd( - comp_data->seg_mask, comp_data->mask_type, - CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0, - CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, h, w, xd->bd); - else - av1_build_compound_diffwtd_mask( - comp_data->seg_mask, comp_data->mask_type, ext_dst0, - ext_dst_stride0, ext_dst1, ext_dst_stride1, h, w); - } - - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - build_masked_compound_highbd( - dst, dst_buf->stride, CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0, - CONVERT_TO_BYTEPTR(ext_dst1), ext_dst_stride1, comp_data, - mbmi->sb_type, h, w, xd->bd); - else - build_masked_compound(dst, dst_buf->stride, ext_dst0, ext_dst_stride0, - ext_dst1, ext_dst_stride1, comp_data, mbmi->sb_type, - h, w); - } else { - if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) - aom_highbd_convolve_copy(CONVERT_TO_BYTEPTR(ext_dst0), ext_dst_stride0, - dst, dst_buf->stride, NULL, 0, NULL, 0, w, h, - xd->bd); - else - aom_convolve_copy(ext_dst0, ext_dst_stride0, dst, dst_buf->stride, NULL, - 0, NULL, 0, w, h); - } -} - -void av1_build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, BLOCK_SIZE bsize, - int plane_from, int plane_to, - uint8_t *ext_dst0[3], - int ext_dst_stride0[3], - uint8_t *ext_dst1[3], - int ext_dst_stride1[3]) { - int plane; - for (plane = plane_from; plane <= plane_to; ++plane) { - const BLOCK_SIZE plane_bsize = get_plane_block_size( - bsize, xd->plane[plane].subsampling_x, xd->plane[plane].subsampling_y); - const int bw = block_size_wide[plane_bsize]; - const int bh = block_size_high[plane_bsize]; - build_wedge_inter_predictor_from_buf( - xd, plane, 0, 0, bw, bh, ext_dst0[plane], ext_dst_stride0[plane], - ext_dst1[plane], ext_dst_stride1[plane]); - } -} diff --git a/third_party/aom/av1/encoder/reconinter_enc.h b/third_party/aom/av1/encoder/reconinter_enc.h deleted file mode 100644 index 10d5e8c28..000000000 --- a/third_party/aom/av1/encoder/reconinter_enc.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_RECONINTER_ENC_H_ -#define AOM_AV1_ENCODER_RECONINTER_ENC_H_ - -#include "aom/aom_integer.h" -#include "av1/common/filter.h" -#include "av1/common/blockd.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/convolve.h" -#include "av1/common/warped_motion.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void av1_build_inter_predictors_sby(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, BUFFER_SET *ctx, - BLOCK_SIZE bsize); - -void av1_build_inter_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, BUFFER_SET *ctx, - BLOCK_SIZE bsize); - -void av1_build_inter_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, BUFFER_SET *ctx, - BLOCK_SIZE bsize, int plane_idx); - -void av1_build_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, BUFFER_SET *ctx, - BLOCK_SIZE bsize); - -void av1_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, - int dst_stride, const MV *src_mv, - const struct scale_factors *sf, int w, int h, - ConvolveParams *conv_params, - InterpFilters interp_filters, - const WarpTypesAllowed *warp_types, int p_col, - int p_row, int plane, int ref, - enum mv_precision precision, int x, int y, - const MACROBLOCKD *xd, int can_use_previous); - -// Detect if the block have sub-pixel level motion vectors -// per component. -#define CHECK_SUBPEL 0 -static INLINE int has_subpel_mv_component(const MB_MODE_INFO *const mbmi, - const MACROBLOCKD *const xd, - int dir) { -#if CHECK_SUBPEL - const BLOCK_SIZE bsize = mbmi->sb_type; - int plane; - int ref = (dir >> 1); - - if (dir & 0x01) { - if (mbmi->mv[ref].as_mv.col & SUBPEL_MASK) return 1; - } else { - if (mbmi->mv[ref].as_mv.row & SUBPEL_MASK) return 1; - } - - return 0; -#else - (void)mbmi; - (void)xd; - (void)dir; - return 1; -#endif -} - -static INLINE int av1_is_interp_search_needed(const MACROBLOCKD *const xd) { - MB_MODE_INFO *const mi = xd->mi[0]; - const int is_compound = has_second_ref(mi); - int ref; - for (ref = 0; ref < 1 + is_compound; ++ref) { - int row_col; - for (row_col = 0; row_col < 2; ++row_col) { - const int dir = (ref << 1) + row_col; - if (has_subpel_mv_component(mi, xd, dir)) { - return 1; - } - } - } - return 0; -} - -void av1_build_prediction_by_above_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, - uint8_t *tmp_buf[MAX_MB_PLANE], - int tmp_width[MAX_MB_PLANE], - int tmp_height[MAX_MB_PLANE], - int tmp_stride[MAX_MB_PLANE]); - -void av1_build_prediction_by_left_preds(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col, - uint8_t *tmp_buf[MAX_MB_PLANE], - int tmp_width[MAX_MB_PLANE], - int tmp_height[MAX_MB_PLANE], - int tmp_stride[MAX_MB_PLANE]); - -void av1_build_obmc_inter_predictors_sb(const AV1_COMMON *cm, MACROBLOCKD *xd, - int mi_row, int mi_col); - -void av1_build_inter_predictors_for_planes_single_buf( - MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane_from, int plane_to, int mi_row, - int mi_col, int ref, uint8_t *ext_dst[3], int ext_dst_stride[3], - int can_use_previous); - -void av1_build_wedge_inter_predictor_from_buf(MACROBLOCKD *xd, BLOCK_SIZE bsize, - int plane_from, int plane_to, - uint8_t *ext_dst0[3], - int ext_dst_stride0[3], - uint8_t *ext_dst1[3], - int ext_dst_stride1[3]); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_RECONINTER_ENC_H_ diff --git a/third_party/aom/av1/encoder/segmentation.c b/third_party/aom/av1/encoder/segmentation.c deleted file mode 100644 index 2e9102745..000000000 --- a/third_party/aom/av1/encoder/segmentation.c +++ /dev/null @@ -1,244 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include - -#include "aom_mem/aom_mem.h" - -#include "av1/common/pred_common.h" -#include "av1/common/tile_common.h" - -#include "av1/encoder/cost.h" -#include "av1/encoder/segmentation.h" - -void av1_enable_segmentation(struct segmentation *seg) { - seg->enabled = 1; - seg->update_map = 1; - seg->update_data = 1; - seg->temporal_update = 0; -} - -void av1_disable_segmentation(struct segmentation *seg) { - seg->enabled = 0; - seg->update_map = 0; - seg->update_data = 0; - seg->temporal_update = 0; -} - -void av1_disable_segfeature(struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id) { - seg->feature_mask[segment_id] &= ~(1 << feature_id); -} - -void av1_clear_segdata(struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id) { - seg->feature_data[segment_id][feature_id] = 0; -} - -static void count_segs(const AV1_COMMON *cm, MACROBLOCKD *xd, - const TileInfo *tile, MB_MODE_INFO **mi, - unsigned *no_pred_segcounts, - unsigned (*temporal_predictor_count)[2], - unsigned *t_unpred_seg_counts, int bw, int bh, - int mi_row, int mi_col) { - int segment_id; - - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - - xd->mi = mi; - segment_id = xd->mi[0]->segment_id; - - set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); - - // Count the number of hits on each segment with no prediction - no_pred_segcounts[segment_id]++; - - // Temporal prediction not allowed on key frames - if (cm->frame_type != KEY_FRAME) { - const BLOCK_SIZE bsize = xd->mi[0]->sb_type; - // Test to see if the segment id matches the predicted value. - const int pred_segment_id = - cm->last_frame_seg_map - ? get_segment_id(cm, cm->last_frame_seg_map, bsize, mi_row, mi_col) - : 0; - const int pred_flag = pred_segment_id == segment_id; - const int pred_context = av1_get_pred_context_seg_id(xd); - - // Store the prediction status for this mb and update counts - // as appropriate - xd->mi[0]->seg_id_predicted = pred_flag; - temporal_predictor_count[pred_context][pred_flag]++; - - // Update the "unpredicted" segment count - if (!pred_flag) t_unpred_seg_counts[segment_id]++; - } -} - -static void count_segs_sb(const AV1_COMMON *cm, MACROBLOCKD *xd, - const TileInfo *tile, MB_MODE_INFO **mi, - unsigned *no_pred_segcounts, - unsigned (*temporal_predictor_count)[2], - unsigned *t_unpred_seg_counts, int mi_row, int mi_col, - BLOCK_SIZE bsize) { - const int mis = cm->mi_stride; - const int bs = mi_size_wide[bsize], hbs = bs / 2; - PARTITION_TYPE partition; - const int qbs = bs / 4; - - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - -#define CSEGS(cs_bw, cs_bh, cs_rowoff, cs_coloff) \ - count_segs(cm, xd, tile, mi + mis * (cs_rowoff) + (cs_coloff), \ - no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, \ - (cs_bw), (cs_bh), mi_row + (cs_rowoff), mi_col + (cs_coloff)); - - if (bsize == BLOCK_8X8) - partition = PARTITION_NONE; - else - partition = get_partition(cm, mi_row, mi_col, bsize); - switch (partition) { - case PARTITION_NONE: CSEGS(bs, bs, 0, 0); break; - case PARTITION_HORZ: - CSEGS(bs, hbs, 0, 0); - CSEGS(bs, hbs, hbs, 0); - break; - case PARTITION_VERT: - CSEGS(hbs, bs, 0, 0); - CSEGS(hbs, bs, 0, hbs); - break; - case PARTITION_HORZ_A: - CSEGS(hbs, hbs, 0, 0); - CSEGS(hbs, hbs, 0, hbs); - CSEGS(bs, hbs, hbs, 0); - break; - case PARTITION_HORZ_B: - CSEGS(bs, hbs, 0, 0); - CSEGS(hbs, hbs, hbs, 0); - CSEGS(hbs, hbs, hbs, hbs); - break; - case PARTITION_VERT_A: - CSEGS(hbs, hbs, 0, 0); - CSEGS(hbs, hbs, hbs, 0); - CSEGS(hbs, bs, 0, hbs); - break; - case PARTITION_VERT_B: - CSEGS(hbs, bs, 0, 0); - CSEGS(hbs, hbs, 0, hbs); - CSEGS(hbs, hbs, hbs, hbs); - break; - case PARTITION_HORZ_4: - CSEGS(bs, qbs, 0, 0); - CSEGS(bs, qbs, qbs, 0); - CSEGS(bs, qbs, 2 * qbs, 0); - if (mi_row + 3 * qbs < cm->mi_rows) CSEGS(bs, qbs, 3 * qbs, 0); - break; - - case PARTITION_VERT_4: - CSEGS(qbs, bs, 0, 0); - CSEGS(qbs, bs, 0, qbs); - CSEGS(qbs, bs, 0, 2 * qbs); - if (mi_col + 3 * qbs < cm->mi_cols) CSEGS(qbs, bs, 0, 3 * qbs); - break; - - case PARTITION_SPLIT: { - const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT); - int n; - - for (n = 0; n < 4; n++) { - const int mi_dc = hbs * (n & 1); - const int mi_dr = hbs * (n >> 1); - - count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc], no_pred_segcounts, - temporal_predictor_count, t_unpred_seg_counts, - mi_row + mi_dr, mi_col + mi_dc, subsize); - } - } break; - default: assert(0); - } - -#undef CSEGS -} - -void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) { - struct segmentation *seg = &cm->seg; - struct segmentation_probs *segp = &cm->fc->seg; - int no_pred_cost; - int t_pred_cost = INT_MAX; - int tile_col, tile_row, mi_row, mi_col; - unsigned temporal_predictor_count[SEG_TEMPORAL_PRED_CTXS][2] = { { 0 } }; - unsigned no_pred_segcounts[MAX_SEGMENTS] = { 0 }; - unsigned t_unpred_seg_counts[MAX_SEGMENTS] = { 0 }; - (void)xd; - - // First of all generate stats regarding how well the last segment map - // predicts this one - for (tile_row = 0; tile_row < cm->tile_rows; tile_row++) { - TileInfo tile_info; - av1_tile_set_row(&tile_info, cm, tile_row); - for (tile_col = 0; tile_col < cm->tile_cols; tile_col++) { - MB_MODE_INFO **mi_ptr; - av1_tile_set_col(&tile_info, cm, tile_col); - mi_ptr = cm->mi_grid_visible + tile_info.mi_row_start * cm->mi_stride + - tile_info.mi_col_start; - for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end; - mi_row += cm->seq_params.mib_size, - mi_ptr += cm->seq_params.mib_size * cm->mi_stride) { - MB_MODE_INFO **mi = mi_ptr; - for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end; - mi_col += cm->seq_params.mib_size, mi += cm->seq_params.mib_size) { - count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts, - temporal_predictor_count, t_unpred_seg_counts, mi_row, - mi_col, cm->seq_params.sb_size); - } - } - } - } - - int seg_id_cost[MAX_SEGMENTS]; - av1_cost_tokens_from_cdf(seg_id_cost, segp->tree_cdf, NULL); - no_pred_cost = 0; - for (int i = 0; i < MAX_SEGMENTS; ++i) - no_pred_cost += no_pred_segcounts[i] * seg_id_cost[i]; - - // Frames without past dependency cannot use temporal prediction - if (cm->primary_ref_frame != PRIMARY_REF_NONE) { - int pred_flag_cost[SEG_TEMPORAL_PRED_CTXS][2]; - for (int i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) - av1_cost_tokens_from_cdf(pred_flag_cost[i], segp->pred_cdf[i], NULL); - t_pred_cost = 0; - // Cost for signaling the prediction flag. - for (int i = 0; i < SEG_TEMPORAL_PRED_CTXS; ++i) { - for (int j = 0; j < 2; ++j) - t_pred_cost += temporal_predictor_count[i][j] * pred_flag_cost[i][j]; - } - // Cost for signaling the unpredicted segment id. - for (int i = 0; i < MAX_SEGMENTS; ++i) - t_pred_cost += t_unpred_seg_counts[i] * seg_id_cost[i]; - } - - // Now choose which coding method to use. - if (t_pred_cost < no_pred_cost) { - assert(!cm->error_resilient_mode); - seg->temporal_update = 1; - } else { - seg->temporal_update = 0; - } -} - -void av1_reset_segment_features(AV1_COMMON *cm) { - struct segmentation *seg = &cm->seg; - - // Set up default state for MB feature flags - seg->enabled = 0; - seg->update_map = 0; - seg->update_data = 0; - av1_clearall_segfeatures(seg); -} diff --git a/third_party/aom/av1/encoder/segmentation.h b/third_party/aom/av1/encoder/segmentation.h deleted file mode 100644 index 1ad13d66a..000000000 --- a/third_party/aom/av1/encoder/segmentation.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_SEGMENTATION_H_ -#define AOM_AV1_ENCODER_SEGMENTATION_H_ - -#include "av1/common/blockd.h" -#include "av1/encoder/encoder.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void av1_enable_segmentation(struct segmentation *seg); -void av1_disable_segmentation(struct segmentation *seg); - -void av1_disable_segfeature(struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id); -void av1_clear_segdata(struct segmentation *seg, int segment_id, - SEG_LVL_FEATURES feature_id); - -void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd); - -void av1_reset_segment_features(AV1_COMMON *cm); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_SEGMENTATION_H_ diff --git a/third_party/aom/av1/encoder/speed_features.c b/third_party/aom/av1/encoder/speed_features.c deleted file mode 100644 index 4c35baae0..000000000 --- a/third_party/aom/av1/encoder/speed_features.c +++ /dev/null @@ -1,564 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include - -#include "av1/encoder/encoder.h" -#include "av1/encoder/speed_features.h" -#include "av1/encoder/rdopt.h" - -#include "aom_dsp/aom_dsp_common.h" - -// Setting this to 1 will disable trellis optimization completely. -// Setting this to 2 will disable trellis optimization within the -// transform search. Trellis optimization will still be applied -// in the final encode. -#define DISABLE_TRELLISQ_SEARCH 0 - -#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method -static MESH_PATTERN - good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = { - { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } }, - { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } }, - { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } }, - { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } }, - { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } }, - { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } }, - }; -static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = { - 50, 50, 25, 15, 5, 1 -}; - -// TODO(huisu@google.com): These settings are pretty relaxed, tune them for -// each speed setting -static MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = { - { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } }, - { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } }, - { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } }, - { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } }, - { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } }, - { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } }, -}; -static uint8_t intrabc_max_mesh_pct[MAX_MESH_SPEED + 1] = { 100, 100, 100, - 25, 25, 10 }; - -// Intra only frames, golden frames (except alt ref overlays) and -// alt ref frames tend to be coded at a higher than ambient quality -static int frame_is_boosted(const AV1_COMP *cpi) { - return frame_is_kf_gf_arf(cpi); -} - -// Sets a partition size down to which the auto partition code will always -// search (can go lower), based on the image dimensions. The logic here -// is that the extent to which ringing artefacts are offensive, depends -// partly on the screen area that over which they propogate. Propogation is -// limited by transform block size but the screen area take up by a given block -// size will be larger for a small image format stretched to full screen. -static BLOCK_SIZE set_partition_min_limit(AV1_COMMON *const cm) { - unsigned int screen_area = (cm->width * cm->height); - - // Select block size based on image format size. - if (screen_area < 1280 * 720) { - // Formats smaller in area than 720P - return BLOCK_4X4; - } else if (screen_area < 1920 * 1080) { - // Format >= 720P and < 1080P - return BLOCK_8X8; - } else { - // Formats 1080P and up - return BLOCK_16X16; - } -} - -// Do we have an internal image edge (e.g. formatting bars). -static int has_internal_image_edge(const AV1_COMP *cpi) { - return (cpi->oxcf.pass == 2) && - ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) || - (cpi->twopass.this_frame_stats.inactive_zone_cols > 0)); -} - -static void set_good_speed_feature_framesize_dependent(AV1_COMP *cpi, - SPEED_FEATURES *sf, - int speed) { - AV1_COMMON *const cm = &cpi->common; - const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; - const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; - - if (is_480p_or_larger) { - sf->use_square_partition_only_threshold = BLOCK_128X128; - } else { - sf->use_square_partition_only_threshold = BLOCK_64X64; - } - - // TODO(huisu@google.com): train models for 720P and above. - if (!is_720p_or_larger) { - sf->ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8 - sf->ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16 - sf->ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32 - sf->ml_partition_search_breakout_thresh[3] = 500; // BLOCK_64X64 - sf->ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128 - } - - if (speed >= 1) { - if (is_720p_or_larger) { - sf->use_square_partition_only_threshold = BLOCK_128X128; - } else if (is_480p_or_larger) { - sf->use_square_partition_only_threshold = BLOCK_64X64; - } else { - sf->use_square_partition_only_threshold = BLOCK_32X32; - } - - if (!is_720p_or_larger) { - sf->ml_partition_search_breakout_thresh[0] = 200; // BLOCK_8X8 - sf->ml_partition_search_breakout_thresh[1] = 250; // BLOCK_16X16 - sf->ml_partition_search_breakout_thresh[2] = 300; // BLOCK_32X32 - sf->ml_partition_search_breakout_thresh[3] = 300; // BLOCK_64X64 - sf->ml_partition_search_breakout_thresh[4] = -1; // BLOCK_128X128 - } - } - - if (speed >= 2) { - if (is_720p_or_larger) { - sf->disable_split_mask = - cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; - sf->adaptive_pred_interp_filter = 0; - sf->partition_search_breakout_dist_thr = (1 << 24); - sf->partition_search_breakout_rate_thr = 120; - } else { - sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; - sf->partition_search_breakout_dist_thr = (1 << 22); - sf->partition_search_breakout_rate_thr = 100; - } - sf->rd_auto_partition_min_limit = set_partition_min_limit(cm); - } - - if (speed >= 3) { - if (is_720p_or_larger) { - sf->disable_split_mask = DISABLE_ALL_SPLIT; - sf->partition_search_breakout_dist_thr = (1 << 25); - sf->partition_search_breakout_rate_thr = 200; - } else { - sf->max_intra_bsize = BLOCK_32X32; - sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; - sf->partition_search_breakout_dist_thr = (1 << 23); - sf->partition_search_breakout_rate_thr = 120; - } - } - - // If this is a two pass clip that fits the criteria for animated or - // graphics content then reset disable_split_mask for speeds 2+. - // Also if the image edge is internal to the coded area. - if ((speed >= 2) && (cpi->oxcf.pass == 2) && - ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) || - (has_internal_image_edge(cpi)))) { - sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; - } - - if (speed >= 4) { - if (is_720p_or_larger) { - sf->partition_search_breakout_dist_thr = (1 << 26); - } else { - sf->partition_search_breakout_dist_thr = (1 << 24); - } - sf->disable_split_mask = DISABLE_ALL_SPLIT; - } -} - -static void set_good_speed_features_framesize_independent(AV1_COMP *cpi, - SPEED_FEATURES *sf, - int speed) { - AV1_COMMON *const cm = &cpi->common; - const int boosted = frame_is_boosted(cpi); - - // Speed 0 for all speed features that give neutral coding performance change. - sf->reduce_inter_modes = 1; - sf->prune_ext_partition_types_search_level = 1; - sf->ml_prune_rect_partition = 1; - sf->ml_prune_ab_partition = 1; - sf->ml_prune_4_partition = 1; - sf->adaptive_txb_search_level = 1; - sf->jnt_comp_skip_mv_search = 1; - sf->model_based_prune_tx_search_level = 1; - sf->model_based_post_interp_filter_breakout = 1; - sf->inter_mode_rd_model_estimation = 1; - sf->prune_ref_frame_for_rect_partitions = - !(boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame); - sf->less_rectangular_check_level = 1; - sf->gm_search_type = GM_REDUCED_REF_SEARCH; - sf->gm_disable_recode = 1; - - if (speed >= 1) { - sf->gm_erroradv_type = GM_ERRORADV_TR_1; - sf->selective_ref_frame = 1; - sf->inter_tx_size_search_init_depth_rect = 1; - sf->inter_tx_size_search_init_depth_sqr = 1; - sf->intra_tx_size_search_init_depth_rect = 1; - sf->intra_tx_size_search_init_depth_sqr = 1; - sf->tx_size_search_lgr_block = 1; - if (speed >= CONFIG_2PASS_PARTITION_SEARCH_LVL) { - sf->two_pass_partition_search = 1; - sf->mode_pruning_based_on_two_pass_partition_search = 1; - } - sf->prune_ext_partition_types_search_level = 2; - sf->use_fast_interpolation_filter_search = 1; - sf->skip_repeat_interpolation_filter_search = 1; - sf->tx_type_search.skip_tx_search = 1; - sf->tx_type_search.ml_tx_split_thresh = 40; - sf->model_based_prune_tx_search_level = 0; - sf->model_based_post_interp_filter_breakout = 0; - // TODO(angiebird): Re-evaluate the impact of inter_mode_rd_model_estimation - // on speed 1 - sf->inter_mode_rd_model_estimation = 0; - sf->adaptive_txb_search_level = 2; - sf->use_intra_txb_hash = 1; - sf->optimize_b_precheck = 1; - sf->dual_sgr_penalty_level = 1; - sf->use_accurate_subpel_search = 1; - sf->reuse_inter_intra_mode = 1; - sf->prune_comp_search_by_single_result = 1; - sf->skip_repeated_newmv = 1; - sf->obmc_full_pixel_search_level = 1; - } - - if (speed >= 2) { - sf->gm_erroradv_type = GM_ERRORADV_TR_2; - - sf->selective_ref_frame = 2; - sf->fast_cdef_search = 1; - - sf->adaptive_rd_thresh = 1; - sf->mv.auto_mv_step_size = 1; - sf->mv.subpel_iters_per_step = 1; - sf->disable_filter_search_var_thresh = 100; - sf->comp_inter_joint_search_thresh = BLOCK_SIZES_ALL; - - sf->partition_search_breakout_rate_thr = 80; - // Note: This speed feature is disable as it seems to be worse in - // compression/quality and is also slower. - // sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; - sf->allow_partition_search_skip = 1; - sf->disable_wedge_search_var_thresh = 100; - sf->fast_wedge_sign_estimate = 1; - } - - if (speed >= 3) { - sf->tx_size_search_method = boosted ? USE_FULL_RD : USE_LARGESTALL; - sf->less_rectangular_check_level = 2; - sf->adaptive_pred_interp_filter = 1; - // adaptive_motion_search breaks encoder multi-thread tests. - // The values in x->pred_mv[] differ for single and multi-thread cases. - // See aomedia:1778. - // sf->adaptive_motion_search = 1; - sf->recode_loop = ALLOW_RECODE_KFARFGF; - sf->use_transform_domain_distortion = 1; - sf->use_accurate_subpel_search = 0; - sf->adaptive_rd_thresh = 2; - sf->tx_type_search.prune_mode = PRUNE_2D_FAST; - sf->gm_search_type = GM_DISABLE_SEARCH; - sf->prune_comp_search_by_single_result = 2; - } - - if (speed >= 4) { - sf->tx_type_search.fast_intra_tx_type_search = 1; - sf->tx_type_search.fast_inter_tx_type_search = 1; - sf->use_square_partition_only_threshold = - boosted ? BLOCK_128X128 : BLOCK_4X4; - sf->tx_size_search_method = - frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; - sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED; - sf->adaptive_pred_interp_filter = 0; - sf->adaptive_mode_search = 1; - sf->cb_partition_search = !boosted; - sf->alt_ref_search_fp = 1; - } - - if (speed >= 5) { - sf->recode_loop = ALLOW_RECODE_KFMAXBW; - sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL; - sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL; - sf->use_square_partition_only_threshold = BLOCK_4X4; - sf->tx_size_search_method = USE_LARGESTALL; - sf->mv.search_method = BIGDIA; - sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; - sf->adaptive_rd_thresh = 4; - sf->mode_search_skip_flags = - (cm->frame_type == KEY_FRAME) - ? 0 - : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | - FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR | - FLAG_EARLY_TERMINATE; - sf->disable_filter_search_var_thresh = 200; - sf->use_fast_coef_costing = 1; - sf->partition_search_breakout_rate_thr = 300; - sf->use_transform_domain_distortion = 2; - } - - if (speed >= 6) { - int i; - sf->optimize_coefficients = NO_TRELLIS_OPT; - sf->mv.search_method = HEX; - sf->disable_filter_search_var_thresh = 500; - for (i = 0; i < TX_SIZES; ++i) { - sf->intra_y_mode_mask[i] = INTRA_DC; - sf->intra_uv_mode_mask[i] = UV_INTRA_DC_CFL; - } - sf->partition_search_breakout_rate_thr = 500; - sf->mv.reduce_first_step_size = 1; - sf->simple_model_rd_from_var = 1; - } - if (speed >= 7) { - sf->default_max_partition_size = BLOCK_32X32; - sf->default_min_partition_size = BLOCK_8X8; - sf->intra_y_mode_mask[TX_64X64] = INTRA_DC; - sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; - sf->frame_parameter_update = 0; - sf->mv.search_method = FAST_HEX; - sf->partition_search_type = REFERENCE_PARTITION; - sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH; - } - if (speed >= 8) { - sf->mv.search_method = FAST_DIAMOND; - sf->mv.subpel_force_stop = 2; - sf->lpf_pick = LPF_PICK_MINIMAL_LPF; - } -} - -void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi) { - SPEED_FEATURES *const sf = &cpi->sf; - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - RD_OPT *const rd = &cpi->rd; - int i; - - if (oxcf->mode == GOOD) { - set_good_speed_feature_framesize_dependent(cpi, sf, oxcf->speed); - } - - if (sf->disable_split_mask == DISABLE_ALL_SPLIT) { - sf->adaptive_pred_interp_filter = 0; - } - - // Check for masked out split cases. - for (i = 0; i < MAX_REFS; ++i) { - if (sf->disable_split_mask & (1 << i)) { - rd->thresh_mult_sub8x8[i] = INT_MAX; - } - } - - // This is only used in motion vector unit test. - if (cpi->oxcf.motion_vector_unit_test == 1) - cpi->find_fractional_mv_step = av1_return_max_sub_pixel_mv; - else if (cpi->oxcf.motion_vector_unit_test == 2) - cpi->find_fractional_mv_step = av1_return_min_sub_pixel_mv; -} - -void av1_set_speed_features_framesize_independent(AV1_COMP *cpi) { - AV1_COMMON *const cm = &cpi->common; - SPEED_FEATURES *const sf = &cpi->sf; - MACROBLOCK *const x = &cpi->td.mb; - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - int i; - - // best quality defaults - sf->frame_parameter_update = 1; - sf->mv.search_method = NSTEP; - sf->recode_loop = ALLOW_RECODE; - sf->mv.subpel_search_method = SUBPEL_TREE; - sf->mv.subpel_iters_per_step = 2; - sf->mv.subpel_force_stop = 0; -#if DISABLE_TRELLISQ_SEARCH == 2 - sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf) - ? FINAL_PASS_TRELLIS_OPT - : NO_TRELLIS_OPT; -#elif DISABLE_TRELLISQ_SEARCH == 1 - sf->optimize_coefficients = NO_TRELLIS_OPT; -#else - if (is_lossless_requested(&cpi->oxcf)) - sf->optimize_coefficients = NO_TRELLIS_OPT; - else - sf->optimize_coefficients = FULL_TRELLIS_OPT; -#endif // DISABLE_TRELLISQ_SEARCH - sf->gm_erroradv_type = GM_ERRORADV_TR_0; - sf->mv.reduce_first_step_size = 0; - sf->mv.auto_mv_step_size = 0; - sf->comp_inter_joint_search_thresh = BLOCK_4X4; - sf->adaptive_rd_thresh = 0; - sf->tx_size_search_method = USE_FULL_RD; - sf->inter_tx_size_search_init_depth_sqr = 0; - sf->inter_tx_size_search_init_depth_rect = 0; - sf->intra_tx_size_search_init_depth_rect = 0; - sf->intra_tx_size_search_init_depth_sqr = 0; - sf->tx_size_search_lgr_block = 0; - sf->model_based_prune_tx_search_level = 0; - sf->model_based_post_interp_filter_breakout = 0; - sf->reduce_inter_modes = 0; - sf->selective_ref_gm = 1; - sf->adaptive_motion_search = 0; - sf->adaptive_pred_interp_filter = 0; - sf->adaptive_mode_search = 0; - sf->cb_partition_search = 0; - sf->alt_ref_search_fp = 0; - sf->partition_search_type = SEARCH_PARTITION; - sf->tx_type_search.prune_mode = PRUNE_2D_ACCURATE; - sf->tx_type_search.ml_tx_split_thresh = 30; - sf->tx_type_search.use_skip_flag_prediction = 1; - sf->tx_type_search.fast_intra_tx_type_search = 0; - sf->tx_type_search.fast_inter_tx_type_search = 0; - sf->tx_type_search.skip_tx_search = 0; - sf->selective_ref_frame = 0; - sf->less_rectangular_check_level = 0; - sf->use_square_partition_only_threshold = BLOCK_128X128; - sf->prune_ref_frame_for_rect_partitions = 0; - sf->auto_min_max_partition_size = NOT_IN_USE; - sf->rd_auto_partition_min_limit = BLOCK_4X4; - sf->default_max_partition_size = BLOCK_LARGEST; - sf->default_min_partition_size = BLOCK_4X4; - sf->adjust_partitioning_from_last_frame = 0; - sf->disable_split_mask = 0; - sf->mode_search_skip_flags = 0; - sf->disable_filter_search_var_thresh = 0; - sf->allow_partition_search_skip = 0; - sf->use_accurate_subpel_search = 2; - sf->disable_wedge_search_var_thresh = 0; - sf->fast_wedge_sign_estimate = 0; - sf->drop_ref = 0; - sf->skip_intra_in_interframe = 1; - sf->txb_split_cap = 1; - sf->adaptive_txb_search_level = 0; - sf->two_pass_partition_search = 0; - sf->mode_pruning_based_on_two_pass_partition_search = 0; - sf->use_intra_txb_hash = 0; - sf->use_inter_txb_hash = 1; - sf->use_mb_rd_hash = 1; - sf->optimize_b_precheck = 0; - sf->jnt_comp_fast_tx_search = 0; - sf->jnt_comp_skip_mv_search = 0; - sf->reuse_inter_intra_mode = 0; - - for (i = 0; i < TX_SIZES; i++) { - sf->intra_y_mode_mask[i] = INTRA_ALL; - sf->intra_uv_mode_mask[i] = UV_INTRA_ALL; - } - sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE; - sf->use_fast_coef_costing = 0; - sf->max_intra_bsize = BLOCK_LARGEST; - // This setting only takes effect when partition_search_type is set - // to FIXED_PARTITION. - sf->always_this_block_size = BLOCK_16X16; - // Recode loop tolerance %. - sf->recode_tolerance = 25; - sf->partition_search_breakout_dist_thr = 0; - sf->partition_search_breakout_rate_thr = 0; - sf->simple_model_rd_from_var = 0; - sf->prune_ext_partition_types_search_level = 0; - sf->ml_prune_rect_partition = 0; - sf->ml_prune_ab_partition = 0; - sf->ml_prune_4_partition = 0; - sf->fast_cdef_search = 0; - for (i = 0; i < PARTITION_BLOCK_SIZES; ++i) - sf->ml_partition_search_breakout_thresh[i] = -1; // -1 means not enabled. - - // Set this at the appropriate speed levels - sf->use_transform_domain_distortion = 0; - sf->gm_search_type = GM_FULL_SEARCH; - sf->gm_disable_recode = 0; - sf->use_fast_interpolation_filter_search = 0; - sf->skip_repeat_interpolation_filter_search = 0; - sf->use_hash_based_trellis = 0; - sf->prune_comp_search_by_single_result = 0; - sf->skip_repeated_newmv = 0; - - // Set decoder side speed feature to use less dual sgr modes - sf->dual_sgr_penalty_level = 0; - - sf->inter_mode_rd_model_estimation = 0; - sf->obmc_full_pixel_search_level = 0; - - if (oxcf->mode == GOOD) - set_good_speed_features_framesize_independent(cpi, sf, oxcf->speed); - - // sf->partition_search_breakout_dist_thr is set assuming max 64x64 - // blocks. Normalise this if the blocks are bigger. - if (MAX_SB_SIZE_LOG2 > 6) { - sf->partition_search_breakout_dist_thr <<= 2 * (MAX_SB_SIZE_LOG2 - 6); - } - - cpi->diamond_search_sad = av1_diamond_search_sad; - - sf->allow_exhaustive_searches = 1; - int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed; - if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) - sf->exhaustive_searches_thresh = (1 << 24); - else - sf->exhaustive_searches_thresh = (1 << 25); - sf->max_exaustive_pct = good_quality_max_mesh_pct[speed]; - if (speed > 0) - sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1; - - for (i = 0; i < MAX_MESH_STEP; ++i) { - sf->mesh_patterns[i].range = good_quality_mesh_patterns[speed][i].range; - sf->mesh_patterns[i].interval = - good_quality_mesh_patterns[speed][i].interval; - } - if ((frame_is_intra_only(cm) && cm->allow_screen_content_tools) && - (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION || - cpi->oxcf.content == AOM_CONTENT_SCREEN)) { - for (i = 0; i < MAX_MESH_STEP; ++i) { - sf->mesh_patterns[i].range = intrabc_mesh_patterns[speed][i].range; - sf->mesh_patterns[i].interval = intrabc_mesh_patterns[speed][i].interval; - } - sf->max_exaustive_pct = intrabc_max_mesh_pct[speed]; - } - - // Slow quant, dct and trellis not worthwhile for first pass - // so make sure they are always turned off. - if (oxcf->pass == 1) sf->optimize_coefficients = NO_TRELLIS_OPT; - - // No recode for 1 pass. - if (oxcf->pass == 0) { - sf->recode_loop = DISALLOW_RECODE; - sf->optimize_coefficients = NO_TRELLIS_OPT; - } - - if (sf->mv.subpel_search_method == SUBPEL_TREE) { - cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree; - } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) { - cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned; - } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) { - cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned_more; - } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_EVENMORE) { - cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned_evenmore; - } - - cpi->optimize_speed_feature = - oxcf->pass != 1 ? sf->optimize_coefficients : NO_TRELLIS_OPT; - // FIXME: trellis not very efficient for quantisation matrices - if (cm->using_qmatrix) cpi->optimize_speed_feature = NO_TRELLIS_OPT; - if (oxcf->disable_trellis_quant) cpi->optimize_speed_feature = NO_TRELLIS_OPT; - - x->min_partition_size = sf->default_min_partition_size; - x->max_partition_size = sf->default_max_partition_size; - - // This is only used in motion vector unit test. - if (cpi->oxcf.motion_vector_unit_test == 1) - cpi->find_fractional_mv_step = av1_return_max_sub_pixel_mv; - else if (cpi->oxcf.motion_vector_unit_test == 2) - cpi->find_fractional_mv_step = av1_return_min_sub_pixel_mv; - -#if CONFIG_DIST_8X8 - if (sf->use_transform_domain_distortion > 0) cpi->oxcf.using_dist_8x8 = 0; - - if (cpi->oxcf.using_dist_8x8) x->min_partition_size = BLOCK_8X8; -#endif // CONFIG_DIST_8X8 -} diff --git a/third_party/aom/av1/encoder/speed_features.h b/third_party/aom/av1/encoder/speed_features.h deleted file mode 100644 index 41013b2e7..000000000 --- a/third_party/aom/av1/encoder/speed_features.h +++ /dev/null @@ -1,568 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_SPEED_FEATURES_H_ -#define AOM_AV1_ENCODER_SPEED_FEATURES_H_ - -#include "av1/common/enums.h" - -#ifdef __cplusplus -extern "C" { -#endif - -enum { - INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) | - (1 << D135_PRED) | (1 << D113_PRED) | (1 << D157_PRED) | - (1 << D203_PRED) | (1 << D67_PRED) | (1 << SMOOTH_PRED) | - (1 << SMOOTH_V_PRED) | (1 << SMOOTH_H_PRED) | (1 << PAETH_PRED), - UV_INTRA_ALL = - (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED) | - (1 << UV_D45_PRED) | (1 << UV_D135_PRED) | (1 << UV_D113_PRED) | - (1 << UV_D157_PRED) | (1 << UV_D203_PRED) | (1 << UV_D67_PRED) | - (1 << UV_SMOOTH_PRED) | (1 << UV_SMOOTH_V_PRED) | - (1 << UV_SMOOTH_H_PRED) | (1 << UV_PAETH_PRED) | (1 << UV_CFL_PRED), - UV_INTRA_DC = (1 << UV_DC_PRED), - UV_INTRA_DC_CFL = (1 << UV_DC_PRED) | (1 << UV_CFL_PRED), - UV_INTRA_DC_TM = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED), - UV_INTRA_DC_PAETH_CFL = - (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) | (1 << UV_CFL_PRED), - UV_INTRA_DC_H_V = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | (1 << UV_H_PRED), - UV_INTRA_DC_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_V_PRED) | - (1 << UV_H_PRED) | (1 << UV_CFL_PRED), - UV_INTRA_DC_PAETH_H_V = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) | - (1 << UV_V_PRED) | (1 << UV_H_PRED), - UV_INTRA_DC_PAETH_H_V_CFL = (1 << UV_DC_PRED) | (1 << UV_PAETH_PRED) | - (1 << UV_V_PRED) | (1 << UV_H_PRED) | - (1 << UV_CFL_PRED), - INTRA_DC = (1 << DC_PRED), - INTRA_DC_TM = (1 << DC_PRED) | (1 << PAETH_PRED), - INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED), - INTRA_DC_PAETH_H_V = - (1 << DC_PRED) | (1 << PAETH_PRED) | (1 << V_PRED) | (1 << H_PRED) -}; - -enum { - INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) | - (1 << NEWMV) | (1 << NEAREST_NEARESTMV) | (1 << NEAR_NEARMV) | - (1 << NEW_NEWMV) | (1 << NEAREST_NEWMV) | (1 << NEAR_NEWMV) | - (1 << NEW_NEARMV) | (1 << NEW_NEARESTMV) | (1 << GLOBAL_GLOBALMV), - INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << GLOBALMV) | - (1 << NEAREST_NEARESTMV) | (1 << GLOBAL_GLOBALMV) | - (1 << NEAREST_NEWMV) | (1 << NEW_NEARESTMV) | - (1 << NEW_NEARMV) | (1 << NEAR_NEWMV) | - (1 << NEAR_NEARMV), -}; - -enum { - DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) | - (1 << THR_ALTR) | (1 << THR_GOLD) | (1 << THR_LAST), - - DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT, - - DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA), - - LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) | - (1 << THR_ALTR) | (1 << THR_GOLD) -}; - -typedef enum { - TXFM_CODING_SF = 1, - INTER_PRED_SF = 2, - INTRA_PRED_SF = 4, - PARTITION_SF = 8, - LOOP_FILTER_SF = 16, - RD_SKIP_SF = 32, - RESERVE_2_SF = 64, - RESERVE_3_SF = 128, -} DEV_SPEED_FEATURES; - -typedef enum { - DIAMOND = 0, - NSTEP = 1, - HEX = 2, - BIGDIA = 3, - SQUARE = 4, - FAST_HEX = 5, - FAST_DIAMOND = 6 -} SEARCH_METHODS; - -typedef enum { - // No recode. - DISALLOW_RECODE = 0, - // Allow recode for KF and exceeding maximum frame bandwidth. - ALLOW_RECODE_KFMAXBW = 1, - // Allow recode only for KF/ARF/GF frames. - ALLOW_RECODE_KFARFGF = 2, - // Allow recode for all frames based on bitrate constraints. - ALLOW_RECODE = 3, -} RECODE_LOOP_TYPE; - -typedef enum { - SUBPEL_TREE = 0, - SUBPEL_TREE_PRUNED = 1, // Prunes 1/2-pel searches - SUBPEL_TREE_PRUNED_MORE = 2, // Prunes 1/2-pel searches more aggressively - SUBPEL_TREE_PRUNED_EVENMORE = 3, // Prunes 1/2- and 1/4-pel searches - // Other methods to come -} SUBPEL_SEARCH_METHODS; - -typedef enum { - USE_FULL_RD = 0, - USE_FAST_RD, - USE_LARGESTALL, -} TX_SIZE_SEARCH_METHOD; - -typedef enum { - NOT_IN_USE = 0, - RELAXED_NEIGHBORING_MIN_MAX = 1 -} AUTO_MIN_MAX_MODE; - -typedef enum { - // Try the full image with different values. - LPF_PICK_FROM_FULL_IMAGE, - // Try a small portion of the image with different values. - LPF_PICK_FROM_SUBIMAGE, - // Estimate the level based on quantizer and frame type - LPF_PICK_FROM_Q, - // Pick 0 to disable LPF if LPF was enabled last frame - LPF_PICK_MINIMAL_LPF -} LPF_PICK_METHOD; - -typedef enum { - // Terminate search early based on distortion so far compared to - // qp step, distortion in the neighborhood of the frame, etc. - FLAG_EARLY_TERMINATE = 1 << 0, - - // Skips comp inter modes if the best so far is an intra mode. - FLAG_SKIP_COMP_BESTINTRA = 1 << 1, - - // Skips oblique intra modes if the best so far is an inter mode. - FLAG_SKIP_INTRA_BESTINTER = 1 << 3, - - // Skips oblique intra modes at angles 27, 63, 117, 153 if the best - // intra so far is not one of the neighboring directions. - FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4, - - // Skips intra modes other than DC_PRED if the source variance is small - FLAG_SKIP_INTRA_LOWVAR = 1 << 5, -} MODE_SEARCH_SKIP_LOGIC; - -typedef enum { - NO_PRUNE = 0, - // eliminates one tx type in vertical and horizontal direction - PRUNE_ONE = 1, - // eliminates two tx types in each direction - PRUNE_TWO = 2, - // adaptively prunes the least perspective tx types out of all 16 - // (tuned to provide negligible quality loss) - PRUNE_2D_ACCURATE = 3, - // similar, but applies much more aggressive pruning to get better speed-up - PRUNE_2D_FAST = 4, -} TX_TYPE_PRUNE_MODE; - -typedef struct { - TX_TYPE_PRUNE_MODE prune_mode; - int fast_intra_tx_type_search; - int fast_inter_tx_type_search; - - // Use a skip flag prediction model to detect blocks with skip = 1 early - // and avoid doing full TX type search for such blocks. - int use_skip_flag_prediction; - - // Threshold used by the ML based method to predict TX block split decisions. - int ml_tx_split_thresh; - - // skip remaining transform type search when we found the rdcost of skip is - // better than applying transform - int skip_tx_search; -} TX_TYPE_SEARCH; - -typedef enum { - // Search partitions using RD criterion - SEARCH_PARTITION, - - // Always use a fixed size partition - FIXED_PARTITION, - - REFERENCE_PARTITION -} PARTITION_SEARCH_TYPE; - -typedef struct MV_SPEED_FEATURES { - // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). - SEARCH_METHODS search_method; - - // This parameter controls which step in the n-step process we start at. - // It's changed adaptively based on circumstances. - int reduce_first_step_size; - - // If this is set to 1, we limit the motion search range to 2 times the - // largest motion vector found in the last frame. - int auto_mv_step_size; - - // Subpel_search_method can only be subpel_tree which does a subpixel - // logarithmic search that keeps stepping at 1/2 pixel units until - // you stop getting a gain, and then goes on to 1/4 and repeats - // the same process. Along the way it skips many diagonals. - SUBPEL_SEARCH_METHODS subpel_search_method; - - // Maximum number of steps in logarithmic subpel search before giving up. - int subpel_iters_per_step; - - // Control when to stop subpel search - int subpel_force_stop; -} MV_SPEED_FEATURES; - -#define MAX_MESH_STEP 4 - -typedef struct MESH_PATTERN { - int range; - int interval; -} MESH_PATTERN; - -typedef enum { - GM_FULL_SEARCH, - GM_REDUCED_REF_SEARCH, - GM_DISABLE_SEARCH -} GM_SEARCH_TYPE; - -typedef enum { - GM_ERRORADV_TR_0, - GM_ERRORADV_TR_1, - GM_ERRORADV_TR_2, - GM_ERRORADV_TR_TYPES, -} GM_ERRORADV_TYPE; - -typedef enum { - NO_TRELLIS_OPT, // No trellis optimization - FULL_TRELLIS_OPT, // Trellis optimization in all stages - FINAL_PASS_TRELLIS_OPT // Trellis optimization in only the final encode pass -} TRELLIS_OPT_TYPE; - -typedef enum { - FULL_TXFM_RD, - LOW_TXFM_RD, -} TXFM_RD_MODEL; - -typedef struct SPEED_FEATURES { - MV_SPEED_FEATURES mv; - - // Frame level coding parameter update - int frame_parameter_update; - - RECODE_LOOP_TYPE recode_loop; - - // Trellis (dynamic programming) optimization of quantized values - TRELLIS_OPT_TYPE optimize_coefficients; - - // Global motion warp error threshold - GM_ERRORADV_TYPE gm_erroradv_type; - - // Always set to 0. If on it enables 0 cost background transmission - // (except for the initial transmission of the segmentation). The feature is - // disabled because the addition of very large block sizes make the - // backgrounds very to cheap to encode, and the segmentation we have - // adds overhead. - int static_segmentation; - - // Limit the inter mode tested in the RD loop - int reduce_inter_modes; - - // Do not compute the global motion parameters for a LAST2_FRAME or - // LAST3_FRAME if the GOLDEN_FRAME is closer and it has a non identity - // global model. - int selective_ref_gm; - - // If 1 we iterate finding a best reference for 2 ref frames together - via - // a log search that iterates 4 times (check around mv for last for best - // error of combined predictor then check around mv for alt). If 0 we - // we just use the best motion vector found for each frame by itself. - BLOCK_SIZE comp_inter_joint_search_thresh; - - // This variable is used to cap the maximum number of times we skip testing a - // mode to be evaluated. A high value means we will be faster. - int adaptive_rd_thresh; - - // Determine which method we use to determine transform size. We can choose - // between options like full rd, largest for prediction size, largest - // for intra and model coefs for the rest. - TX_SIZE_SEARCH_METHOD tx_size_search_method; - - // Init search depth for square and rectangular transform partitions. - // Values: - // 0 - search full tree, 1: search 1 level, 2: search the highest level only - int inter_tx_size_search_init_depth_sqr; - int inter_tx_size_search_init_depth_rect; - int intra_tx_size_search_init_depth_sqr; - int intra_tx_size_search_init_depth_rect; - // If any dimension of a coding block size above 64, always search the - // largest transform only, since the largest transform block size is 64x64. - int tx_size_search_lgr_block; - - PARTITION_SEARCH_TYPE partition_search_type; - - TX_TYPE_SEARCH tx_type_search; - - // Skip split transform block partition when the collocated bigger block - // is selected as all zero coefficients. - int txb_split_cap; - - // Shortcut the transform block partition and type search when the target - // rdcost is relatively lower. - // Values are 0 (not used) , or 1 - 2 with progressively increasing - // aggressiveness - int adaptive_txb_search_level; - - // Prune level for tx_size_type search for inter based on rd model - // 0: no pruning - // 1-2: progressively increasing aggressiveness of pruning - int model_based_prune_tx_search_level; - - // Model based breakout after interpolation filter search - // 0: no breakout - // 1: use model based rd breakout - int model_based_post_interp_filter_breakout; - - // Used if partition_search_type = FIXED_SIZE_PARTITION - BLOCK_SIZE always_this_block_size; - - // Drop less likely to be picked reference frames in the RD search. - // Has three levels for now: 0, 1 and 2, where higher levels prune more - // aggressively than lower ones. (0 means no pruning). - int selective_ref_frame; - - // Prune extended partition types search - // Can take values 0 - 2, 0 referring to no pruning, and 1 - 2 increasing - // aggressiveness of pruning in order. - int prune_ext_partition_types_search_level; - - // Use a ML model to prune horz and vert partitions - int ml_prune_rect_partition; - - // Use a ML model to prune horz_a, horz_b, vert_a and vert_b partitions. - int ml_prune_ab_partition; - - // Use a ML model to prune horz4 and vert4 partitions. - int ml_prune_4_partition; - - int fast_cdef_search; - - // 2-pass coding block partition search - int two_pass_partition_search; - - // Use the mode decisions made in the initial partition search to prune mode - // candidates, e.g. ref frames. - int mode_pruning_based_on_two_pass_partition_search; - - // Skip rectangular partition test when partition type none gives better - // rd than partition type split. Can take values 0 - 2, 0 referring to no - // skipping, and 1 - 2 increasing aggressiveness of skipping in order. - int less_rectangular_check_level; - - // Use square partition only beyond this block size. - BLOCK_SIZE use_square_partition_only_threshold; - - // Prune reference frames for rectangular partitions. - int prune_ref_frame_for_rect_partitions; - - // Sets min and max partition sizes for this superblock based on the - // same superblock in last encoded frame, and the left and above neighbor. - AUTO_MIN_MAX_MODE auto_min_max_partition_size; - // Ensures the rd based auto partition search will always - // go down at least to the specified level. - BLOCK_SIZE rd_auto_partition_min_limit; - - // Min and max partition size we enable (block_size) as per auto - // min max, but also used by adjust partitioning, and pick_partitioning. - BLOCK_SIZE default_min_partition_size; - BLOCK_SIZE default_max_partition_size; - - // Whether or not we allow partitions one smaller or one greater than the last - // frame's partitioning. Only used if use_lastframe_partitioning is set. - int adjust_partitioning_from_last_frame; - - // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable - // it always, to allow it for only Last frame and Intra, disable it for all - // inter modes or to enable it always. - int disable_split_mask; - - // TODO(jingning): combine the related motion search speed features - // This allows us to use motion search at other sizes as a starting - // point for this motion search and limits the search range around it. - int adaptive_motion_search; - - // Flag for allowing some use of exhaustive searches; - int allow_exhaustive_searches; - - // Threshold for allowing exhaistive motion search. - int exhaustive_searches_thresh; - - // Maximum number of exhaustive searches for a frame. - int max_exaustive_pct; - - // Pattern to be used for any exhaustive mesh searches. - MESH_PATTERN mesh_patterns[MAX_MESH_STEP]; - - // Allows sub 8x8 modes to use the prediction filter that was determined - // best for 8x8 mode. If set to 0 we always re check all the filters for - // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter - // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. - int adaptive_pred_interp_filter; - - // Adaptive prediction mode search - int adaptive_mode_search; - - int cb_partition_search; - - int alt_ref_search_fp; - - // Implements various heuristics to skip searching modes - // The heuristics selected are based on flags - // defined in the MODE_SEARCH_SKIP_HEURISTICS enum - unsigned int mode_search_skip_flags; - - // A source variance threshold below which filter search is disabled - // Choose a very large value (UINT_MAX) to use 8-tap always - unsigned int disable_filter_search_var_thresh; - - // A source variance threshold below which wedge search is disabled - unsigned int disable_wedge_search_var_thresh; - - // Whether fast wedge sign estimate is used - int fast_wedge_sign_estimate; - - // These bit masks allow you to enable or disable intra modes for each - // transform size separately. - int intra_y_mode_mask[TX_SIZES]; - int intra_uv_mode_mask[TX_SIZES]; - - // This feature controls how the loop filter level is determined. - LPF_PICK_METHOD lpf_pick; - - // This feature controls whether we do the expensive context update and - // calculation in the rd coefficient costing loop. - int use_fast_coef_costing; - - // This feature controls the tolerence vs target used in deciding whether to - // recode a frame. It has no meaning if recode is disabled. - int recode_tolerance; - - // This variable controls the maximum block size where intra blocks can be - // used in inter frames. - // TODO(aconverse): Fold this into one of the other many mode skips - BLOCK_SIZE max_intra_bsize; - - // Partition search early breakout thresholds. - int64_t partition_search_breakout_dist_thr; - int partition_search_breakout_rate_thr; - - // Thresholds for ML based partition search breakout. - int ml_partition_search_breakout_thresh[PARTITION_BLOCK_SIZES]; - - // Allow skipping partition search for still image frame - int allow_partition_search_skip; - - // Fast approximation of av1_model_rd_from_var_lapndz - int simple_model_rd_from_var; - - // If true, sub-pixel search uses the exact convolve function used for final - // encoding and decoding; otherwise, it uses bilinear interpolation. - int use_accurate_subpel_search; - - // Whether to compute distortion in the image domain (slower but - // more accurate), or in the transform domain (faster but less acurate). - // 0: use image domain - // 1: use transform domain in tx_type search, and use image domain for - // RD_STATS - // 2: use transform domain - int use_transform_domain_distortion; - - GM_SEARCH_TYPE gm_search_type; - - // whether to disable the global motion recode loop - int gm_disable_recode; - - // Do limited interpolation filter search for dual filters, since best choice - // usually includes EIGHTTAP_REGULAR. - int use_fast_interpolation_filter_search; - - // Save results of interpolation_filter_search for a block - // Check mv and ref_frames before search, if they are same with previous - // saved results, it can be skipped. - int skip_repeat_interpolation_filter_search; - - // Use a hash table to store previously computed optimized qcoeffs from - // expensive calls to optimize_txb. - int use_hash_based_trellis; - - // flag to drop some ref frames in compound motion search - int drop_ref; - - // flag to allow skipping intra mode for inter frame prediction - int skip_intra_in_interframe; - - // Use hash table to store intra(keyframe only) txb transform search results - // to avoid repeated search on the same residue signal. - int use_intra_txb_hash; - - // Use hash table to store inter txb transform search results - // to avoid repeated search on the same residue signal. - int use_inter_txb_hash; - - // Use hash table to store macroblock RD search results - // to avoid repeated search on the same residue signal. - int use_mb_rd_hash; - - // Calculate RD cost before doing optimize_b, and skip if the cost is large. - int optimize_b_precheck; - - // Use model rd instead of transform search in jnt_comp - int jnt_comp_fast_tx_search; - - // Skip mv search in jnt_comp - int jnt_comp_skip_mv_search; - - // Decoder side speed feature to add penalty for use of dual-sgr filters. - // Takes values 0 - 10, 0 indicating no penalty and each additional level - // adding a penalty of 1% - int dual_sgr_penalty_level; - - // Dynamically estimate final rd from prediction error and mode cost - int inter_mode_rd_model_estimation; - - // Skip some ref frames in compound motion search by single motion search - // result. Has three levels for now: 0 referring to no skipping, and 1 - 3 - // increasing aggressiveness of skipping in order. - // Note: The search order might affect the result. It is better to search same - // single inter mode as a group. - int prune_comp_search_by_single_result; - - // Reuse the inter_intra_mode search result from NEARESTMV mode to other - // single ref modes - int reuse_inter_intra_mode; - - // Set the full pixel search level of obmc - // 0: obmc_full_pixel_diamond - // 1: obmc_refining_search_sad (faster) - int obmc_full_pixel_search_level; - - // flag to skip NEWMV mode in drl if the motion search result is the same - int skip_repeated_newmv; -} SPEED_FEATURES; - -struct AV1_COMP; - -void av1_set_speed_features_framesize_independent(struct AV1_COMP *cpi); -void av1_set_speed_features_framesize_dependent(struct AV1_COMP *cpi); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_SPEED_FEATURES_H_ diff --git a/third_party/aom/av1/encoder/temporal_filter.c b/third_party/aom/av1/encoder/temporal_filter.c deleted file mode 100644 index 75fdf02a5..000000000 --- a/third_party/aom/av1/encoder/temporal_filter.c +++ /dev/null @@ -1,602 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include - -#include "config/aom_config.h" - -#include "av1/common/alloccommon.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/quant_common.h" -#include "av1/common/reconinter.h" -#include "av1/common/odintrin.h" -#include "av1/encoder/av1_quantize.h" -#include "av1/encoder/extend.h" -#include "av1/encoder/firstpass.h" -#include "av1/encoder/mcomp.h" -#include "av1/encoder/encoder.h" -#include "av1/encoder/ratectrl.h" -#include "av1/encoder/reconinter_enc.h" -#include "av1/encoder/segmentation.h" -#include "av1/encoder/temporal_filter.h" -#include "aom_dsp/aom_dsp_common.h" -#include "aom_mem/aom_mem.h" -#include "aom_ports/mem.h" -#include "aom_ports/aom_timer.h" -#include "aom_scale/aom_scale.h" - -static void temporal_filter_predictors_mb_c( - MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr, - int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col, - uint8_t *pred, struct scale_factors *scale, int x, int y, - int can_use_previous, int num_planes) { - const MV mv = { mv_row, mv_col }; - enum mv_precision mv_precision_uv; - int uv_stride; - // TODO(angiebird): change plane setting accordingly - ConvolveParams conv_params = get_conv_params(0, 0, xd->bd); - const InterpFilters interp_filters = xd->mi[0]->interp_filters; - WarpTypesAllowed warp_types; - memset(&warp_types, 0, sizeof(WarpTypesAllowed)); - - if (uv_block_width == 8) { - uv_stride = (stride + 1) >> 1; - mv_precision_uv = MV_PRECISION_Q4; - } else { - uv_stride = stride; - mv_precision_uv = MV_PRECISION_Q3; - } - av1_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16, - &conv_params, interp_filters, &warp_types, x, y, 0, - 0, MV_PRECISION_Q3, x, y, xd, can_use_previous); - - if (num_planes > 1) { - av1_build_inter_predictor( - u_mb_ptr, uv_stride, &pred[256], uv_block_width, &mv, scale, - uv_block_width, uv_block_height, &conv_params, interp_filters, - &warp_types, x, y, 1, 0, mv_precision_uv, x, y, xd, can_use_previous); - - av1_build_inter_predictor( - v_mb_ptr, uv_stride, &pred[512], uv_block_width, &mv, scale, - uv_block_width, uv_block_height, &conv_params, interp_filters, - &warp_types, x, y, 2, 0, mv_precision_uv, x, y, xd, can_use_previous); - } -} - -void av1_temporal_filter_apply_c(uint8_t *frame1, unsigned int stride, - uint8_t *frame2, unsigned int block_width, - unsigned int block_height, int strength, - int filter_weight, unsigned int *accumulator, - uint16_t *count) { - unsigned int i, j, k; - int modifier; - int byte = 0; - const int rounding = strength > 0 ? 1 << (strength - 1) : 0; - - for (i = 0, k = 0; i < block_height; i++) { - for (j = 0; j < block_width; j++, k++) { - int pixel_value = *frame2; - - // non-local mean approach - int diff_sse[9] = { 0 }; - int idx, idy, index = 0; - - for (idy = -1; idy <= 1; ++idy) { - for (idx = -1; idx <= 1; ++idx) { - int row = (int)i + idy; - int col = (int)j + idx; - - if (row >= 0 && row < (int)block_height && col >= 0 && - col < (int)block_width) { - int diff = frame1[byte + idy * (int)stride + idx] - - frame2[idy * (int)block_width + idx]; - diff_sse[index] = diff * diff; - ++index; - } - } - } - - assert(index > 0); - - modifier = 0; - for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx]; - - modifier *= 3; - modifier /= index; - - ++frame2; - - modifier += rounding; - modifier >>= strength; - - if (modifier > 16) modifier = 16; - - modifier = 16 - modifier; - modifier *= filter_weight; - - count[k] += modifier; - accumulator[k] += modifier * pixel_value; - - byte++; - } - - byte += stride - block_width; - } -} - -void av1_highbd_temporal_filter_apply_c( - uint8_t *frame1_8, unsigned int stride, uint8_t *frame2_8, - unsigned int block_width, unsigned int block_height, int strength, - int filter_weight, unsigned int *accumulator, uint16_t *count) { - uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8); - uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8); - unsigned int i, j, k; - int modifier; - int byte = 0; - const int rounding = strength > 0 ? 1 << (strength - 1) : 0; - - for (i = 0, k = 0; i < block_height; i++) { - for (j = 0; j < block_width; j++, k++) { - int pixel_value = *frame2; - - // non-local mean approach - int diff_sse[9] = { 0 }; - int idx, idy, index = 0; - - for (idy = -1; idy <= 1; ++idy) { - for (idx = -1; idx <= 1; ++idx) { - int row = (int)i + idy; - int col = (int)j + idx; - - if (row >= 0 && row < (int)block_height && col >= 0 && - col < (int)block_width) { - int diff = frame1[byte + idy * (int)stride + idx] - - frame2[idy * (int)block_width + idx]; - diff_sse[index] = diff * diff; - ++index; - } - } - } - - assert(index > 0); - - modifier = 0; - for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx]; - - modifier *= 3; - modifier /= index; - - ++frame2; - - modifier += rounding; - modifier >>= strength; - - if (modifier > 16) modifier = 16; - - modifier = 16 - modifier; - modifier *= filter_weight; - - count[k] += modifier; - accumulator[k] += modifier * pixel_value; - - byte++; - } - - byte += stride - block_width; - } -} - -static int temporal_filter_find_matching_mb_c(AV1_COMP *cpi, - uint8_t *arf_frame_buf, - uint8_t *frame_ptr_buf, - int stride, int x_pos, - int y_pos) { - MACROBLOCK *const x = &cpi->td.mb; - MACROBLOCKD *const xd = &x->e_mbd; - const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; - int step_param; - int sadpb = x->sadperbit16; - int bestsme = INT_MAX; - int distortion; - unsigned int sse; - int cost_list[5]; - MvLimits tmp_mv_limits = x->mv_limits; - - MV best_ref_mv1 = kZeroMv; - MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ - - // Save input state - struct buf_2d src = x->plane[0].src; - struct buf_2d pre = xd->plane[0].pre[0]; - - best_ref_mv1_full.col = best_ref_mv1.col >> 3; - best_ref_mv1_full.row = best_ref_mv1.row >> 3; - - // Setup frame pointers - x->plane[0].src.buf = arf_frame_buf; - x->plane[0].src.stride = stride; - xd->plane[0].pre[0].buf = frame_ptr_buf; - xd->plane[0].pre[0].stride = stride; - - step_param = mv_sf->reduce_first_step_size; - step_param = AOMMIN(step_param, MAX_MVSEARCH_STEPS - 2); - - av1_set_mv_search_range(&x->mv_limits, &best_ref_mv1); - - x->mvcost = x->mv_cost_stack; - x->nmvjointcost = x->nmv_vec_cost; - - av1_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param, - NSTEP, 1, sadpb, cond_cost_list(cpi, cost_list), - &best_ref_mv1, 0, 0, x_pos, y_pos, 0); - x->mv_limits = tmp_mv_limits; - - // Ignore mv costing by sending NULL pointer instead of cost array - if (cpi->common.cur_frame_force_integer_mv == 1) { - const uint8_t *const src_address = x->plane[0].src.buf; - const int src_stride = x->plane[0].src.stride; - const uint8_t *const y = xd->plane[0].pre[0].buf; - const int y_stride = xd->plane[0].pre[0].stride; - const int offset = x->best_mv.as_mv.row * y_stride + x->best_mv.as_mv.col; - - x->best_mv.as_mv.row *= 8; - x->best_mv.as_mv.col *= 8; - - bestsme = cpi->fn_ptr[BLOCK_16X16].vf(y + offset, y_stride, src_address, - src_stride, &sse); - } else { - bestsme = cpi->find_fractional_mv_step( - x, &cpi->common, 0, 0, &best_ref_mv1, - cpi->common.allow_high_precision_mv, x->errorperbit, - &cpi->fn_ptr[BLOCK_16X16], 0, mv_sf->subpel_iters_per_step, - cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, - NULL, 0, 0, 0, 0, 0); - } - - x->e_mbd.mi[0]->mv[0] = x->best_mv; - - // Restore input state - x->plane[0].src = src; - xd->plane[0].pre[0] = pre; - - return bestsme; -} - -static void temporal_filter_iterate_c(AV1_COMP *cpi, - YV12_BUFFER_CONFIG **frames, - int frame_count, int alt_ref_index, - int strength, - struct scale_factors *scale) { - const AV1_COMMON *cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - int byte; - int frame; - int mb_col, mb_row; - unsigned int filter_weight; - int mb_cols = (frames[alt_ref_index]->y_crop_width + 15) >> 4; - int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4; - int mb_y_offset = 0; - int mb_uv_offset = 0; - DECLARE_ALIGNED(16, unsigned int, accumulator[16 * 16 * 3]); - DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]); - MACROBLOCKD *mbd = &cpi->td.mb.e_mbd; - YV12_BUFFER_CONFIG *f = frames[alt_ref_index]; - uint8_t *dst1, *dst2; - DECLARE_ALIGNED(32, uint16_t, predictor16[16 * 16 * 3]); - DECLARE_ALIGNED(32, uint8_t, predictor8[16 * 16 * 3]); - uint8_t *predictor; - const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y; - const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x; - - // Save input state - uint8_t *input_buffer[MAX_MB_PLANE]; - int i; - if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - predictor = CONVERT_TO_BYTEPTR(predictor16); - } else { - predictor = predictor8; - } - - for (i = 0; i < num_planes; i++) input_buffer[i] = mbd->plane[i].pre[0].buf; - - for (mb_row = 0; mb_row < mb_rows; mb_row++) { - // Source frames are extended to 16 pixels. This is different than - // L/A/G reference frames that have a border of 32 (AV1ENCBORDERINPIXELS) - // A 6/8 tap filter is used for motion search. This requires 2 pixels - // before and 3 pixels after. So the largest Y mv on a border would - // then be 16 - AOM_INTERP_EXTEND. The UV blocks are half the size of the - // Y and therefore only extended by 8. The largest mv that a UV block - // can support is 8 - AOM_INTERP_EXTEND. A UV mv is half of a Y mv. - // (16 - AOM_INTERP_EXTEND) >> 1 which is greater than - // 8 - AOM_INTERP_EXTEND. - // To keep the mv in play for both Y and UV planes the max that it - // can be on a border is therefore 16 - (2*AOM_INTERP_EXTEND+1). - cpi->td.mb.mv_limits.row_min = - -((mb_row * 16) + (17 - 2 * AOM_INTERP_EXTEND)); - cpi->td.mb.mv_limits.row_max = - ((mb_rows - 1 - mb_row) * 16) + (17 - 2 * AOM_INTERP_EXTEND); - - for (mb_col = 0; mb_col < mb_cols; mb_col++) { - int j, k; - int stride; - - memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0])); - memset(count, 0, 16 * 16 * 3 * sizeof(count[0])); - - cpi->td.mb.mv_limits.col_min = - -((mb_col * 16) + (17 - 2 * AOM_INTERP_EXTEND)); - cpi->td.mb.mv_limits.col_max = - ((mb_cols - 1 - mb_col) * 16) + (17 - 2 * AOM_INTERP_EXTEND); - - for (frame = 0; frame < frame_count; frame++) { - const int thresh_low = 10000; - const int thresh_high = 20000; - - if (frames[frame] == NULL) continue; - - mbd->mi[0]->mv[0].as_mv.row = 0; - mbd->mi[0]->mv[0].as_mv.col = 0; - mbd->mi[0]->motion_mode = SIMPLE_TRANSLATION; - - if (frame == alt_ref_index) { - filter_weight = 2; - } else { - // Find best match in this frame by MC - int err = temporal_filter_find_matching_mb_c( - cpi, frames[alt_ref_index]->y_buffer + mb_y_offset, - frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride, - mb_col * 16, mb_row * 16); - - // Assign higher weight to matching MB if it's error - // score is lower. If not applying MC default behavior - // is to weight all MBs equal. - filter_weight = err < thresh_low ? 2 : err < thresh_high ? 1 : 0; - } - - if (filter_weight != 0) { - // Construct the predictors - temporal_filter_predictors_mb_c( - mbd, frames[frame]->y_buffer + mb_y_offset, - frames[frame]->u_buffer + mb_uv_offset, - frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride, - mb_uv_width, mb_uv_height, mbd->mi[0]->mv[0].as_mv.row, - mbd->mi[0]->mv[0].as_mv.col, predictor, scale, mb_col * 16, - mb_row * 16, cm->allow_warped_motion, num_planes); - - // Apply the filter (YUV) - if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - int adj_strength = strength + 2 * (mbd->bd - 8); - av1_highbd_temporal_filter_apply( - f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16, - adj_strength, filter_weight, accumulator, count); - if (num_planes > 1) { - av1_highbd_temporal_filter_apply( - f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256, - mb_uv_width, mb_uv_height, adj_strength, filter_weight, - accumulator + 256, count + 256); - av1_highbd_temporal_filter_apply( - f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512, - mb_uv_width, mb_uv_height, adj_strength, filter_weight, - accumulator + 512, count + 512); - } - } else { - av1_temporal_filter_apply_c(f->y_buffer + mb_y_offset, f->y_stride, - predictor, 16, 16, strength, - filter_weight, accumulator, count); - if (num_planes > 1) { - av1_temporal_filter_apply_c( - f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256, - mb_uv_width, mb_uv_height, strength, filter_weight, - accumulator + 256, count + 256); - av1_temporal_filter_apply_c( - f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512, - mb_uv_width, mb_uv_height, strength, filter_weight, - accumulator + 512, count + 512); - } - } - } - } - - // Normalize filter output to produce AltRef frame - if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { - uint16_t *dst1_16; - uint16_t *dst2_16; - dst1 = cpi->alt_ref_buffer.y_buffer; - dst1_16 = CONVERT_TO_SHORTPTR(dst1); - stride = cpi->alt_ref_buffer.y_stride; - byte = mb_y_offset; - for (i = 0, k = 0; i < 16; i++) { - for (j = 0; j < 16; j++, k++) { - dst1_16[byte] = - (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); - - // move to next pixel - byte++; - } - - byte += stride - 16; - } - if (num_planes > 1) { - dst1 = cpi->alt_ref_buffer.u_buffer; - dst2 = cpi->alt_ref_buffer.v_buffer; - dst1_16 = CONVERT_TO_SHORTPTR(dst1); - dst2_16 = CONVERT_TO_SHORTPTR(dst2); - stride = cpi->alt_ref_buffer.uv_stride; - byte = mb_uv_offset; - for (i = 0, k = 256; i < mb_uv_height; i++) { - for (j = 0; j < mb_uv_width; j++, k++) { - int m = k + 256; - // U - dst1_16[byte] = - (uint16_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); - // V - dst2_16[byte] = - (uint16_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]); - // move to next pixel - byte++; - } - byte += stride - mb_uv_width; - } - } - } else { - dst1 = cpi->alt_ref_buffer.y_buffer; - stride = cpi->alt_ref_buffer.y_stride; - byte = mb_y_offset; - for (i = 0, k = 0; i < 16; i++) { - for (j = 0; j < 16; j++, k++) { - dst1[byte] = - (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); - - // move to next pixel - byte++; - } - byte += stride - 16; - } - if (num_planes > 1) { - dst1 = cpi->alt_ref_buffer.u_buffer; - dst2 = cpi->alt_ref_buffer.v_buffer; - stride = cpi->alt_ref_buffer.uv_stride; - byte = mb_uv_offset; - for (i = 0, k = 256; i < mb_uv_height; i++) { - for (j = 0; j < mb_uv_width; j++, k++) { - int m = k + 256; - // U - dst1[byte] = - (uint8_t)OD_DIVU(accumulator[k] + (count[k] >> 1), count[k]); - // V - dst2[byte] = - (uint8_t)OD_DIVU(accumulator[m] + (count[m] >> 1), count[m]); - // move to next pixel - byte++; - } - byte += stride - mb_uv_width; - } - } - } - mb_y_offset += 16; - mb_uv_offset += mb_uv_width; - } - mb_y_offset += 16 * (f->y_stride - mb_cols); - mb_uv_offset += mb_uv_height * f->uv_stride - mb_uv_width * mb_cols; - } - - // Restore input state - for (i = 0; i < num_planes; i++) mbd->plane[i].pre[0].buf = input_buffer[i]; -} - -// Apply buffer limits and context specific adjustments to arnr filter. -static void adjust_arnr_filter(AV1_COMP *cpi, int distance, int group_boost, - int *arnr_frames, int *arnr_strength) { - const AV1EncoderConfig *const oxcf = &cpi->oxcf; - const int frames_after_arf = - av1_lookahead_depth(cpi->lookahead) - distance - 1; - int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1; - int frames_bwd; - int q, frames, strength; - - // Define the forward and backwards filter limits for this arnr group. - if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf; - if (frames_fwd > distance) frames_fwd = distance; - - frames_bwd = frames_fwd; - - // For even length filter there is one more frame backward - // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. - if (frames_bwd < distance) frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1; - - // Set the baseline active filter size. - frames = frames_bwd + 1 + frames_fwd; - - // Adjust the strength based on active max q. - if (cpi->common.current_video_frame > 1) - q = ((int)av1_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME], - cpi->common.seq_params.bit_depth)); - else - q = ((int)av1_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME], - cpi->common.seq_params.bit_depth)); - if (q > 16) { - strength = oxcf->arnr_strength; - } else { - strength = oxcf->arnr_strength - ((16 - q) / 2); - if (strength < 0) strength = 0; - } - - // Adjust number of frames in filter and strength based on gf boost level. - if (frames > group_boost / 150) { - frames = group_boost / 150; - frames += !(frames & 1); - } - - if (strength > group_boost / 300) { - strength = group_boost / 300; - } - - *arnr_frames = frames; - *arnr_strength = strength; -} - -void av1_temporal_filter(AV1_COMP *cpi, int distance) { - RATE_CONTROL *const rc = &cpi->rc; - int frame; - int frames_to_blur; - int start_frame; - int strength; - int frames_to_blur_backward; - int frames_to_blur_forward; - struct scale_factors sf; - YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL }; - const GF_GROUP *const gf_group = &cpi->twopass.gf_group; - - // Apply context specific adjustments to the arnr filter parameters. - adjust_arnr_filter(cpi, distance, rc->gfu_boost, &frames_to_blur, &strength); - // TODO(weitinglin): Currently, we enforce the filtering strength on - // extra ARFs' to be zeros. We should investigate in which - // case it is more beneficial to use non-zero strength - // filtering. - if (gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) { - strength = 0; - frames_to_blur = 1; - } - - int which_arf = gf_group->arf_update_idx[gf_group->index]; - - // Set the temporal filtering status for the corresponding OVERLAY frame - if (strength == 0 && frames_to_blur == 1) - cpi->is_arf_filter_off[which_arf] = 1; - else - cpi->is_arf_filter_off[which_arf] = 0; - cpi->common.showable_frame = cpi->is_arf_filter_off[which_arf]; - - frames_to_blur_backward = (frames_to_blur / 2); - frames_to_blur_forward = ((frames_to_blur - 1) / 2); - start_frame = distance + frames_to_blur_forward; - - // Setup frame pointers, NULL indicates frame not included in filter. - for (frame = 0; frame < frames_to_blur; ++frame) { - const int which_buffer = start_frame - frame; - struct lookahead_entry *buf = - av1_lookahead_peek(cpi->lookahead, which_buffer); - frames[frames_to_blur - 1 - frame] = &buf->img; - } - - if (frames_to_blur > 0) { - // Setup scaling factors. Scaling on each of the arnr frames is not - // supported. - // ARF is produced at the native frame size and resized when coded. - av1_setup_scale_factors_for_frame( - &sf, frames[0]->y_crop_width, frames[0]->y_crop_height, - frames[0]->y_crop_width, frames[0]->y_crop_height); - } - - temporal_filter_iterate_c(cpi, frames, frames_to_blur, - frames_to_blur_backward, strength, &sf); -} diff --git a/third_party/aom/av1/encoder/temporal_filter.h b/third_party/aom/av1/encoder/temporal_filter.h deleted file mode 100644 index 2ddc68b2c..000000000 --- a/third_party/aom/av1/encoder/temporal_filter.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_TEMPORAL_FILTER_H_ -#define AOM_AV1_ENCODER_TEMPORAL_FILTER_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -void av1_temporal_filter(AV1_COMP *cpi, int distance); - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_TEMPORAL_FILTER_H_ diff --git a/third_party/aom/av1/encoder/tokenize.c b/third_party/aom/av1/encoder/tokenize.c deleted file mode 100644 index 16a6a9a35..000000000 --- a/third_party/aom/av1/encoder/tokenize.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include -#include - -#include "aom_mem/aom_mem.h" - -#include "av1/common/entropy.h" -#include "av1/common/pred_common.h" -#include "av1/common/scan.h" -#include "av1/common/seg_common.h" - -#include "av1/encoder/cost.h" -#include "av1/encoder/encoder.h" -#include "av1/encoder/encodetxb.h" -#include "av1/encoder/rdopt.h" -#include "av1/encoder/tokenize.h" - -static int cost_and_tokenize_map(Av1ColorMapParam *param, TOKENEXTRA **t, - int plane, int calc_rate, int allow_update_cdf, - FRAME_COUNTS *counts) { - const uint8_t *const color_map = param->color_map; - MapCdf map_cdf = param->map_cdf; - ColorCost color_cost = param->color_cost; - const int plane_block_width = param->plane_width; - const int rows = param->rows; - const int cols = param->cols; - const int n = param->n_colors; - const int palette_size_idx = n - PALETTE_MIN_SIZE; - int this_rate = 0; - uint8_t color_order[PALETTE_MAX_SIZE]; - - (void)plane; - (void)counts; - - for (int k = 1; k < rows + cols - 1; ++k) { - for (int j = AOMMIN(k, cols - 1); j >= AOMMAX(0, k - rows + 1); --j) { - int i = k - j; - int color_new_idx; - const int color_ctx = av1_get_palette_color_index_context( - color_map, plane_block_width, i, j, n, color_order, &color_new_idx); - assert(color_new_idx >= 0 && color_new_idx < n); - if (calc_rate) { - this_rate += (*color_cost)[palette_size_idx][color_ctx][color_new_idx]; - } else { - (*t)->token = color_new_idx; - (*t)->color_map_cdf = map_cdf[palette_size_idx][color_ctx]; - ++(*t); - if (allow_update_cdf) - update_cdf(map_cdf[palette_size_idx][color_ctx], color_new_idx, n); -#if CONFIG_ENTROPY_STATS - if (plane) { - ++counts->palette_uv_color_index[palette_size_idx][color_ctx] - [color_new_idx]; - } else { - ++counts->palette_y_color_index[palette_size_idx][color_ctx] - [color_new_idx]; - } -#endif - } - } - } - if (calc_rate) return this_rate; - return 0; -} - -static void get_palette_params(const MACROBLOCK *const x, int plane, - BLOCK_SIZE bsize, Av1ColorMapParam *params) { - const MACROBLOCKD *const xd = &x->e_mbd; - const MB_MODE_INFO *const mbmi = xd->mi[0]; - const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info; - params->color_map = xd->plane[plane].color_index_map; - params->map_cdf = plane ? xd->tile_ctx->palette_uv_color_index_cdf - : xd->tile_ctx->palette_y_color_index_cdf; - params->color_cost = - plane ? &x->palette_uv_color_cost : &x->palette_y_color_cost; - params->n_colors = pmi->palette_size[plane]; - av1_get_block_dimensions(bsize, plane, xd, ¶ms->plane_width, NULL, - ¶ms->rows, ¶ms->cols); -} - -static void get_color_map_params(const MACROBLOCK *const x, int plane, - BLOCK_SIZE bsize, TX_SIZE tx_size, - COLOR_MAP_TYPE type, - Av1ColorMapParam *params) { - (void)tx_size; - memset(params, 0, sizeof(*params)); - switch (type) { - case PALETTE_MAP: get_palette_params(x, plane, bsize, params); break; - default: assert(0 && "Invalid color map type"); return; - } -} - -int av1_cost_color_map(const MACROBLOCK *const x, int plane, BLOCK_SIZE bsize, - TX_SIZE tx_size, COLOR_MAP_TYPE type) { - assert(plane == 0 || plane == 1); - Av1ColorMapParam color_map_params; - get_color_map_params(x, plane, bsize, tx_size, type, &color_map_params); - return cost_and_tokenize_map(&color_map_params, NULL, plane, 1, 0, NULL); -} - -void av1_tokenize_color_map(const MACROBLOCK *const x, int plane, - TOKENEXTRA **t, BLOCK_SIZE bsize, TX_SIZE tx_size, - COLOR_MAP_TYPE type, int allow_update_cdf, - FRAME_COUNTS *counts) { - assert(plane == 0 || plane == 1); - Av1ColorMapParam color_map_params; - get_color_map_params(x, plane, bsize, tx_size, type, &color_map_params); - // The first color index does not use context or entropy. - (*t)->token = color_map_params.color_map[0]; - (*t)->color_map_cdf = NULL; - ++(*t); - cost_and_tokenize_map(&color_map_params, t, plane, 0, allow_update_cdf, - counts); -} - -void tokenize_vartx(ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run, - TX_SIZE tx_size, BLOCK_SIZE plane_bsize, int blk_row, - int blk_col, int block, int plane, void *arg) { - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const int max_blocks_high = max_block_high(xd, plane_bsize, plane); - const int max_blocks_wide = max_block_wide(xd, plane_bsize, plane); - - if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return; - - const TX_SIZE plane_tx_size = - plane ? av1_get_max_uv_txsize(mbmi->sb_type, pd->subsampling_x, - pd->subsampling_y) - : mbmi->inter_tx_size[av1_get_txb_size_index(plane_bsize, blk_row, - blk_col)]; - - if (tx_size == plane_tx_size || plane) { - plane_bsize = get_plane_block_size(mbmi->sb_type, pd->subsampling_x, - pd->subsampling_y); - if (!dry_run) { - av1_update_and_record_txb_context(plane, block, blk_row, blk_col, - plane_bsize, tx_size, arg); - } else if (dry_run == DRY_RUN_NORMAL) { - av1_update_txb_context_b(plane, block, blk_row, blk_col, plane_bsize, - tx_size, arg); - } else { - printf("DRY_RUN_COSTCOEFFS is not supported yet\n"); - assert(0); - } - } else { - // Half the block size in transform block unit. - const TX_SIZE sub_txs = sub_tx_size_map[tx_size]; - const int bsw = tx_size_wide_unit[sub_txs]; - const int bsh = tx_size_high_unit[sub_txs]; - const int step = bsw * bsh; - - assert(bsw > 0 && bsh > 0); - - for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) { - for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) { - const int offsetr = blk_row + row; - const int offsetc = blk_col + col; - - if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue; - - tokenize_vartx(td, t, dry_run, sub_txs, plane_bsize, offsetr, offsetc, - block, plane, arg); - block += step; - } - } - } -} - -void av1_tokenize_sb_vartx(const AV1_COMP *cpi, ThreadData *td, TOKENEXTRA **t, - RUN_TYPE dry_run, int mi_row, int mi_col, - BLOCK_SIZE bsize, int *rate, - uint8_t allow_update_cdf) { - const AV1_COMMON *const cm = &cpi->common; - const int num_planes = av1_num_planes(cm); - MACROBLOCK *const x = &td->mb; - MACROBLOCKD *const xd = &x->e_mbd; - MB_MODE_INFO *const mbmi = xd->mi[0]; - (void)t; - struct tokenize_b_args arg = { cpi, td, t, 0, allow_update_cdf }; - if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - - if (mbmi->skip) { - av1_reset_skip_context(xd, mi_row, mi_col, bsize, num_planes); - return; - } - - for (int plane = 0; plane < num_planes; ++plane) { - if (!is_chroma_reference(mi_row, mi_col, bsize, - xd->plane[plane].subsampling_x, - xd->plane[plane].subsampling_y)) { - continue; - } - const struct macroblockd_plane *const pd = &xd->plane[plane]; - const BLOCK_SIZE bsizec = - scale_chroma_bsize(bsize, pd->subsampling_x, pd->subsampling_y); - const BLOCK_SIZE plane_bsize = - get_plane_block_size(bsizec, pd->subsampling_x, pd->subsampling_y); - const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0]; - const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0]; - const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, plane); - const BLOCK_SIZE txb_size = txsize_to_bsize[max_tx_size]; - int bw = block_size_wide[txb_size] >> tx_size_wide_log2[0]; - int bh = block_size_high[txb_size] >> tx_size_high_log2[0]; - int idx, idy; - int block = 0; - int step = tx_size_wide_unit[max_tx_size] * tx_size_high_unit[max_tx_size]; - - const BLOCK_SIZE max_unit_bsize = - get_plane_block_size(BLOCK_64X64, pd->subsampling_x, pd->subsampling_y); - int mu_blocks_wide = - block_size_wide[max_unit_bsize] >> tx_size_wide_log2[0]; - int mu_blocks_high = - block_size_high[max_unit_bsize] >> tx_size_high_log2[0]; - - mu_blocks_wide = AOMMIN(mi_width, mu_blocks_wide); - mu_blocks_high = AOMMIN(mi_height, mu_blocks_high); - - for (idy = 0; idy < mi_height; idy += mu_blocks_high) { - for (idx = 0; idx < mi_width; idx += mu_blocks_wide) { - int blk_row, blk_col; - const int unit_height = AOMMIN(mu_blocks_high + idy, mi_height); - const int unit_width = AOMMIN(mu_blocks_wide + idx, mi_width); - for (blk_row = idy; blk_row < unit_height; blk_row += bh) { - for (blk_col = idx; blk_col < unit_width; blk_col += bw) { - tokenize_vartx(td, t, dry_run, max_tx_size, plane_bsize, blk_row, - blk_col, block, plane, &arg); - block += step; - } - } - } - } - } - if (rate) *rate += arg.this_rate; -} diff --git a/third_party/aom/av1/encoder/tokenize.h b/third_party/aom/av1/encoder/tokenize.h deleted file mode 100644 index 63b505f36..000000000 --- a/third_party/aom/av1/encoder/tokenize.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_TOKENIZE_H_ -#define AOM_AV1_ENCODER_TOKENIZE_H_ - -#include "av1/common/entropy.h" -#include "av1/encoder/block.h" -#include "aom_dsp/bitwriter.h" - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - aom_cdf_prob *color_map_cdf; - // TODO(yaowu: use packed enum type if appropriate) - uint8_t token; -} TOKENEXTRA; - -struct AV1_COMP; -struct ThreadData; -struct FRAME_COUNTS; - -struct tokenize_b_args { - const struct AV1_COMP *cpi; - struct ThreadData *td; - TOKENEXTRA **tp; - int this_rate; - uint8_t allow_update_cdf; -}; - -typedef enum { - OUTPUT_ENABLED = 0, - DRY_RUN_NORMAL, - DRY_RUN_COSTCOEFFS, -} RUN_TYPE; - -// Note in all the tokenize functions rate if non NULL is incremented -// with the coefficient token cost only if dry_run = DRY_RUN_COSTCOEFS, -// otherwise rate is not incremented. -void av1_tokenize_sb_vartx(const struct AV1_COMP *cpi, struct ThreadData *td, - TOKENEXTRA **t, RUN_TYPE dry_run, int mi_row, - int mi_col, BLOCK_SIZE bsize, int *rate, - uint8_t allow_update_cdf); - -int av1_cost_color_map(const MACROBLOCK *const x, int plane, BLOCK_SIZE bsize, - TX_SIZE tx_size, COLOR_MAP_TYPE type); - -void av1_tokenize_color_map(const MACROBLOCK *const x, int plane, - TOKENEXTRA **t, BLOCK_SIZE bsize, TX_SIZE tx_size, - COLOR_MAP_TYPE type, int allow_update_cdf, - struct FRAME_COUNTS *counts); - -static INLINE int av1_get_tx_eob(const struct segmentation *seg, int segment_id, - TX_SIZE tx_size) { - const int eob_max = av1_get_max_eob(tx_size); - return segfeature_active(seg, segment_id, SEG_LVL_SKIP) ? 0 : eob_max; -} - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_TOKENIZE_H_ diff --git a/third_party/aom/av1/encoder/tx_prune_model_weights.h b/third_party/aom/av1/encoder/tx_prune_model_weights.h deleted file mode 100644 index 405bc9e6e..000000000 --- a/third_party/aom/av1/encoder/tx_prune_model_weights.h +++ /dev/null @@ -1,1944 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_TX_PRUNE_MODEL_WEIGHTS_H_ -#define AOM_AV1_ENCODER_TX_PRUNE_MODEL_WEIGHTS_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "av1/encoder/ml.h" - -// Tx type model for 4x4 block. -static const float av1_tx_type_nn_weights_4x4_hor_layer0[32] = { - -1.64947f, -1.54497f, -1.62832f, -0.17774f, -2.89498f, -0.72498f, 0.72036f, - 0.17996f, 1.20000f, -0.27654f, 0.77396f, 1.21684f, -1.75909f, -0.51272f, - -1.25923f, 0.35005f, -0.04257f, -0.23389f, -0.41841f, -0.08229f, 0.09503f, - 2.73144f, -0.16875f, -0.23482f, 0.02194f, -0.26427f, 0.28049f, 0.21260f, - 1.35792f, 0.27733f, 0.88660f, -0.68304f, -}; - -static const float av1_tx_type_nn_bias_4x4_hor_layer0[8] = { - 1.38742f, 0.59540f, -1.37622f, 1.92114f, - 0.00000f, -0.38998f, -0.32726f, -0.15650f, -}; - -static const float av1_tx_type_nn_weights_4x4_hor_layer1[32] = { - 1.65254f, 1.00915f, -0.89318f, -2.05142f, -0.23235f, 0.96781f, -0.37145f, - -0.21056f, 1.13891f, 0.38675f, 0.87739f, -1.42697f, 0.48015f, 0.61883f, - -0.03979f, 0.11487f, 0.48042f, 0.45200f, -0.23242f, 0.75166f, 0.55458f, - 0.39452f, -0.35285f, 1.59120f, -1.49221f, -0.48349f, -0.64692f, 1.49297f, - -0.26782f, -0.65416f, -0.10648f, 0.05568f, -}; - -static const float av1_tx_type_nn_bias_4x4_hor_layer1[4] = { - 4.07177f, - 3.26961f, - 0.58083f, - 1.21199f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_4x4_hor = { - 4, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 8, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_4x4_hor_layer0, - av1_tx_type_nn_weights_4x4_hor_layer1 }, - { av1_tx_type_nn_bias_4x4_hor_layer0, av1_tx_type_nn_bias_4x4_hor_layer1 } -}; - -static const float av1_tx_type_nn_weights_4x4_ver_layer0[32] = { - -0.02032f, 2.61610f, 0.02098f, -0.30217f, 0.12637f, 0.11017f, -3.01996f, - 0.35144f, 1.93776f, -0.20463f, 1.64102f, -1.41986f, -3.66717f, -0.51655f, - 0.43910f, 0.37778f, -1.02634f, 0.85337f, -0.69753f, 1.00206f, 2.11784f, - 1.89427f, 1.92919f, 0.43201f, -1.67358f, -1.67035f, -1.54623f, 0.16714f, - -0.06589f, -0.28142f, -0.33118f, 1.72227f, -}; - -static const float av1_tx_type_nn_bias_4x4_ver_layer0[8] = { - -0.33685f, 0.22025f, 0.28140f, 0.56138f, - 0.93489f, -1.77048f, 1.34989f, -0.93747f, -}; - -static const float av1_tx_type_nn_weights_4x4_ver_layer1[32] = { - -1.39506f, -1.06271f, -1.10886f, -1.69719f, 0.19699f, -2.39850f, -1.26457f, - 0.75328f, -1.26005f, -0.82738f, -0.12015f, -1.02702f, 1.40828f, -2.37739f, - -0.65639f, -0.71992f, -0.90453f, -1.12510f, -2.41362f, -1.16061f, -1.85577f, - -0.99165f, -1.91366f, 0.16785f, 0.34776f, 0.58154f, -0.18217f, -0.29257f, - -0.86315f, -0.53336f, 0.30320f, -1.32331f, -}; - -static const float av1_tx_type_nn_bias_4x4_ver_layer1[4] = { - -1.31519f, - -3.26321f, - 1.71794f, - -1.90778f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_4x4_ver = { - 4, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 8, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_4x4_ver_layer0, - av1_tx_type_nn_weights_4x4_ver_layer1 }, - { av1_tx_type_nn_bias_4x4_ver_layer0, av1_tx_type_nn_bias_4x4_ver_layer1 } -}; -/******************************************************************************/ - -// Tx type model for 4x8 block. -static const float av1_tx_type_nn_weights_4x8_hor_layer0[32] = { - 0.00218f, -0.41880f, -0.61215f, -0.92588f, 0.54291f, -0.10898f, 0.70691f, - 0.46819f, -1.61598f, -0.08834f, -0.96839f, 1.18489f, -0.45171f, -0.65445f, - -0.32179f, -0.10399f, 1.04379f, 0.91895f, 0.85589f, 0.08267f, 1.35388f, - -2.03096f, 0.08168f, -0.06372f, -0.26732f, -0.48262f, -0.08682f, 2.44071f, - -1.35896f, -1.17121f, 1.68866f, 0.10357f, -}; - -static const float av1_tx_type_nn_bias_4x8_hor_layer0[8] = { - 2.93391f, 0.66831f, -0.21419f, 0.00000f, - -0.72878f, 0.15127f, -1.46755f, 0.16658f, -}; - -static const float av1_tx_type_nn_weights_4x8_hor_layer1[32] = { - -1.52077f, -1.06243f, 0.35319f, -0.49207f, 0.54524f, 0.44271f, 1.37117f, - -0.38957f, -1.28889f, -0.57133f, 0.04658f, 0.62278f, 0.37984f, 0.33247f, - 1.65547f, -0.56806f, -1.38645f, -0.76258f, 0.67926f, 0.08783f, -0.01443f, - 0.34950f, 1.45812f, -0.51332f, -1.41331f, -0.16453f, 0.05755f, 0.31405f, - -0.50191f, 0.18219f, 1.83664f, -0.75276f, -}; - -static const float av1_tx_type_nn_bias_4x8_hor_layer1[4] = { - -1.17455f, - -2.26089f, - -1.79863f, - -2.26333f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_4x8_hor = { - 4, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 8, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_4x8_hor_layer0, - av1_tx_type_nn_weights_4x8_hor_layer1 }, - { av1_tx_type_nn_bias_4x8_hor_layer0, av1_tx_type_nn_bias_4x8_hor_layer1 } -}; - -static const float av1_tx_type_nn_weights_4x8_ver_layer0[128] = { - -0.00952f, -0.98858f, -0.93181f, 1.39594f, 0.96559f, 0.18162f, -0.76064f, - -0.06066f, 0.07907f, -0.09365f, -0.21313f, -0.02187f, -2.61707f, -2.68702f, - -0.10982f, 0.18559f, 1.17049f, 1.11387f, 1.12697f, 1.05804f, 1.12764f, - 1.06318f, 1.12052f, 0.17406f, 1.83157f, 0.19362f, 0.46910f, 0.39608f, - 0.33342f, 0.40083f, 0.27645f, 1.06864f, -4.06645f, -0.38775f, -0.11070f, - 0.03781f, -0.09141f, 0.06185f, -0.04852f, 0.20163f, 0.16784f, 0.16641f, - -0.50941f, -0.61087f, 2.07008f, -0.82381f, -0.85558f, 0.05528f, -0.10535f, - -2.81150f, 0.67038f, 0.43643f, 0.49062f, -0.04465f, 0.90438f, 0.00977f, - 0.46272f, 1.59751f, 0.95234f, 0.35086f, 0.85624f, 0.73149f, 1.67779f, - -2.21511f, -1.24746f, -1.09014f, -0.92441f, -1.22591f, -1.06961f, -0.95897f, - -1.24956f, 0.73797f, 1.23275f, -0.60064f, -0.07851f, 0.14397f, 0.22110f, - -0.04422f, 0.14350f, 0.75926f, 0.35032f, 0.48104f, 2.81408f, 0.34662f, - 0.42090f, 0.35521f, -1.36804f, -0.14974f, -0.47696f, -0.07892f, 0.36910f, - 0.32299f, 0.23916f, 0.06032f, -0.17844f, -0.17558f, -1.42746f, -0.55828f, - -1.00418f, -0.64823f, -0.73654f, -0.85197f, -1.50989f, 1.69385f, -0.04973f, - -0.09273f, 1.04249f, 0.79235f, 1.13229f, 0.99617f, 0.03851f, 0.56334f, - 0.90795f, 1.08296f, 0.58519f, 1.74765f, 0.63971f, 1.35951f, 0.07803f, - -0.05127f, 0.26514f, -0.84629f, -0.66343f, -2.10630f, 0.11017f, 2.18528f, - -0.21958f, 0.05970f, -}; - -static const float av1_tx_type_nn_bias_4x8_ver_layer0[16] = { - 0.04205f, 0.22260f, -1.03870f, -1.19568f, 0.44283f, 0.01143f, - 0.00235f, 4.26772f, 0.44364f, -0.33199f, -0.39076f, -0.35129f, - 0.08288f, 0.18195f, -0.79890f, 0.10047f, -}; - -static const float av1_tx_type_nn_weights_4x8_ver_layer1[64] = { - -0.38193f, -0.12095f, 1.57802f, 0.34932f, -0.47333f, -0.12304f, -0.01736f, - -2.52445f, 0.18983f, -0.64707f, -0.60889f, -0.53750f, 0.91666f, -0.62823f, - -0.13377f, -0.43594f, -0.38618f, -0.01328f, 0.97457f, 1.48589f, -1.03238f, - -0.33459f, -0.35108f, -2.42417f, 0.60229f, 0.06824f, -0.75495f, 0.26902f, - 0.65311f, -0.23887f, -0.44604f, -0.55800f, -0.33842f, 0.04259f, -0.59589f, - 0.49738f, -0.62301f, -0.30896f, -0.29602f, -2.57052f, 2.00943f, -0.66490f, - -0.76312f, 0.28256f, 1.06311f, -0.38364f, -0.63508f, -0.57609f, -0.88765f, - -1.04403f, -0.46531f, 0.34084f, -1.20498f, -0.68352f, -0.72251f, -2.63242f, - -0.68736f, -0.37904f, -1.32371f, 0.47288f, 1.51904f, 0.78372f, -1.01830f, - -1.01848f, -}; - -static const float av1_tx_type_nn_bias_4x8_ver_layer1[4] = { - -1.45955f, - -2.08949f, - -1.24813f, - -1.55368f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_4x8_ver = { - 8, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 16, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_4x8_ver_layer0, - av1_tx_type_nn_weights_4x8_ver_layer1 }, - { av1_tx_type_nn_bias_4x8_ver_layer0, av1_tx_type_nn_bias_4x8_ver_layer1 } -}; -/******************************************************************************/ - -// Tx type model for 8x4 block. -static const float av1_tx_type_nn_weights_8x4_hor_layer0[128] = { - -0.22492f, 0.13341f, -4.03243f, -0.64015f, 0.02783f, 0.60466f, -0.13335f, - 0.16828f, 0.12336f, 0.52904f, 1.18455f, -0.32425f, 0.13052f, 0.93810f, - -3.71165f, 0.02990f, -4.63558f, 0.05666f, 0.03524f, -0.07449f, -0.44006f, - -0.33215f, -0.33713f, 0.08097f, 0.60873f, 0.29582f, 0.21696f, -0.78729f, - -0.16757f, -0.26567f, -0.00720f, -1.11226f, 1.58189f, 1.58463f, 1.48536f, - 1.54374f, 1.60069f, 1.46125f, 1.53932f, 0.05974f, -1.82192f, 0.47043f, - 0.38090f, 0.20833f, -0.05637f, 0.05183f, 0.01323f, -0.25662f, 0.78634f, - -0.55069f, -0.02975f, -1.29294f, -0.77192f, -2.34299f, -1.28074f, 0.77894f, - -1.69740f, -1.66032f, -1.44323f, -1.55063f, -1.50845f, -1.23690f, -1.80663f, - 0.75079f, 2.32551f, 0.05878f, 0.80438f, 0.88584f, 0.69153f, 0.89060f, - 0.73660f, 0.87259f, -0.00745f, -1.30044f, -0.59430f, 2.07270f, 1.03307f, - -0.84697f, -1.19393f, 0.17549f, -0.24978f, -3.67234f, 0.20781f, -0.53946f, - -0.05068f, 0.88274f, 1.30371f, 0.10288f, 0.07585f, 0.12259f, -0.30815f, - 0.25437f, -2.82096f, -2.69482f, 0.02370f, 0.12500f, -0.21019f, -0.49220f, - 0.03638f, -0.29795f, 0.28645f, -0.48432f, -0.38584f, -0.32148f, -0.47197f, - 0.32437f, 0.32528f, -0.19437f, 0.30383f, -0.31879f, 0.26359f, -0.12164f, - -0.43647f, -0.08288f, -0.33438f, -0.63608f, -0.46647f, -0.46574f, 0.47806f, - -0.49012f, -1.51234f, -1.13502f, -1.20470f, -1.02913f, -1.09182f, -0.93921f, - -1.85523f, 0.92532f, -}; - -static const float av1_tx_type_nn_bias_8x4_hor_layer0[16] = { - 0.36631f, 0.02901f, 0.64305f, 1.53074f, -1.40229f, 0.03852f, - -0.05043f, 0.89632f, -1.23312f, 0.07036f, 0.17070f, 0.56250f, - -0.28958f, -0.32869f, -0.01704f, 0.68171f, -}; - -static const float av1_tx_type_nn_weights_8x4_hor_layer1[64] = { - -0.49441f, -0.31960f, -0.84946f, -0.85800f, -2.37767f, 0.81373f, -0.73172f, - -0.69337f, 0.88807f, -0.49242f, -0.44717f, -0.11436f, 0.09978f, 0.15393f, - 0.17083f, 1.44850f, -0.20582f, -0.04906f, 0.42990f, -0.61939f, -1.09692f, - -1.14885f, -1.36879f, -1.30828f, -0.59558f, -0.30903f, -0.08906f, 0.06953f, - 0.15383f, -0.04193f, -0.54858f, 1.82676f, -0.22411f, 0.05264f, -0.45848f, - -0.72985f, 0.87553f, 0.04116f, -1.29774f, -2.63018f, 1.09089f, -0.36048f, - -0.16725f, 0.11627f, 0.49918f, 0.07539f, 0.00763f, 0.73706f, 0.87800f, - 0.57049f, 0.60969f, 1.02779f, 1.53339f, -0.35915f, 0.06410f, 1.44582f, - 0.09698f, 0.71888f, 0.60594f, 0.84103f, -0.50440f, -0.38825f, 0.15626f, - -1.10654f, -}; - -static const float av1_tx_type_nn_bias_8x4_hor_layer1[4] = { - -0.92861f, - -1.45151f, - -1.33588f, - -4.33853f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_8x4_hor = { - 8, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 16, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_8x4_hor_layer0, - av1_tx_type_nn_weights_8x4_hor_layer1 }, - { av1_tx_type_nn_bias_8x4_hor_layer0, av1_tx_type_nn_bias_8x4_hor_layer1 } -}; - -static const float av1_tx_type_nn_weights_8x4_ver_layer0[32] = { - -1.10946f, 1.86574f, -1.59343f, 0.27018f, -1.70676f, -0.73982f, -0.19021f, - -1.94208f, -2.29759f, -1.44402f, 0.28700f, -1.18340f, -1.50158f, -0.44175f, - -1.36831f, 1.00374f, 2.59312f, 0.50291f, -0.71042f, -0.12238f, -0.15901f, - -0.22807f, -0.67376f, -0.30215f, 0.54407f, -0.45538f, 1.18262f, 2.28687f, - 1.66212f, 1.70826f, 1.55182f, 0.12230f, -}; - -static const float av1_tx_type_nn_bias_8x4_ver_layer0[8] = { - 0.10943f, 2.09789f, 2.16578f, 0.15766f, - -0.42461f, 0.00000f, 1.22090f, -1.28717f, -}; - -static const float av1_tx_type_nn_weights_8x4_ver_layer1[32] = { - 1.20426f, -1.23237f, 2.41053f, -0.72488f, 1.25249f, 0.18018f, -0.09586f, - 2.17901f, 0.15364f, 1.21535f, -0.38263f, -0.74309f, 0.50551f, -0.54208f, - 0.59139f, 1.16095f, 0.55919f, -0.60183f, 1.18949f, 1.60787f, 0.54002f, - -0.10712f, -0.16153f, 0.16207f, -0.32338f, 2.68712f, -2.83483f, -0.27086f, - -1.15005f, -0.39311f, 1.51236f, -1.68973f, -}; - -static const float av1_tx_type_nn_bias_8x4_ver_layer1[4] = { - 1.81013f, - 1.10517f, - 2.90059f, - 0.95391f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_8x4_ver = { - 4, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 8, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_8x4_ver_layer0, - av1_tx_type_nn_weights_8x4_ver_layer1 }, - { av1_tx_type_nn_bias_8x4_ver_layer0, av1_tx_type_nn_bias_8x4_ver_layer1 } -}; -/******************************************************************************/ - -// Tx type model for 8x8 block. -static const float av1_tx_type_nn_weights_8x8_hor_layer0[128] = { - -0.85529f, 0.37619f, 0.12754f, 0.08622f, 0.45278f, 0.54929f, 1.60651f, - -0.62654f, -0.54929f, -0.10131f, -0.17569f, 0.13948f, 0.31695f, -0.05616f, - 0.20483f, -0.36448f, 2.27203f, -0.33087f, 0.47679f, 0.86888f, 0.39370f, - 0.46239f, 0.01113f, 1.50327f, -1.48226f, -1.69621f, -1.49777f, -1.38885f, - -1.37753f, -1.22681f, -1.70576f, 0.51329f, -1.65662f, 1.74197f, -0.13579f, - -0.13133f, -0.58396f, -0.55510f, -1.10709f, -2.34975f, 0.22445f, -0.56491f, - -0.83432f, 0.13492f, 1.32147f, 2.85285f, 0.13819f, 0.03792f, -1.30792f, - 0.04155f, -0.70644f, -0.43430f, -0.16212f, -0.86945f, -1.16976f, 1.68339f, - 0.29540f, 0.01137f, -0.25335f, -0.16856f, 0.12028f, 0.05207f, 0.39357f, - -0.01545f, -0.21980f, -1.94091f, -1.01315f, -0.68270f, -0.40590f, -0.67111f, - 2.08283f, 0.19291f, -4.81426f, -0.65044f, -0.24598f, 0.06371f, -0.10272f, - -0.14502f, -0.06821f, 0.45202f, 0.21091f, -0.80864f, 0.39255f, 1.79189f, - 1.80453f, 1.10484f, 1.17608f, 0.96901f, -0.35871f, -0.94311f, 0.63147f, - 2.95157f, 0.45917f, -0.42849f, -0.55643f, -0.06097f, 3.49299f, -0.50972f, - 0.11075f, -0.08405f, -0.09274f, -0.22694f, -0.42426f, 0.48632f, -1.61074f, - 1.82998f, 0.37623f, -1.20330f, -0.01142f, -1.33307f, -0.27492f, -2.23621f, - 1.38846f, 1.42085f, 1.42568f, 1.36152f, 1.46910f, 1.27473f, 1.34752f, - 0.12753f, -1.08197f, -1.08280f, -0.79489f, -1.12338f, -1.06795f, -0.87857f, - -0.99892f, 1.09823f, -}; - -static const float av1_tx_type_nn_bias_8x8_hor_layer0[16] = { - -0.49232f, -0.29685f, -1.44020f, 1.10940f, 1.16452f, -0.34862f, - -0.38761f, -0.36243f, 0.21776f, 0.28234f, 2.34269f, -0.04104f, - -0.26319f, 2.65579f, -1.30137f, -0.01487f, -}; - -static const float av1_tx_type_nn_weights_8x8_hor_layer1[64] = { - -0.38058f, -0.41295f, -1.26884f, -0.75560f, -1.57450f, 0.56072f, -1.42322f, - -0.29106f, 0.07228f, 0.04391f, 1.61388f, -0.03055f, 0.81637f, 2.06045f, - 0.27119f, -0.48328f, -0.45528f, -0.60534f, -1.61209f, -0.78157f, -1.65034f, - 0.60958f, -1.30523f, 0.25143f, 0.11398f, 0.37860f, 1.54829f, 0.02309f, - 0.67288f, 2.11447f, 0.44845f, -0.70406f, -0.67897f, -0.38759f, -1.30383f, - -1.22646f, -1.54571f, 0.60552f, -1.52565f, 0.11469f, 0.17344f, 0.08622f, - 1.57906f, -0.00909f, 0.81634f, 2.04909f, 1.26466f, -1.45741f, -0.75229f, - 0.06200f, -1.05835f, -0.66257f, -1.73766f, 0.99923f, -1.87082f, 0.14580f, - 0.49525f, 0.46839f, 1.32203f, 0.33923f, 0.97001f, 2.38584f, 1.58811f, - 0.06161f, -}; - -static const float av1_tx_type_nn_bias_8x8_hor_layer1[4] = { - 1.70385f, - 1.82373f, - 1.78496f, - 1.80826f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_8x8_hor = { - 8, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 16, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_8x8_hor_layer0, - av1_tx_type_nn_weights_8x8_hor_layer1 }, - { av1_tx_type_nn_bias_8x8_hor_layer0, av1_tx_type_nn_bias_8x8_hor_layer1 } -}; - -static const float av1_tx_type_nn_weights_8x8_ver_layer0[128] = { - -0.67016f, -1.72366f, -1.86576f, -1.50962f, -1.70419f, -1.73964f, -1.84615f, - 2.09681f, -0.05081f, -0.61030f, 2.02541f, 0.60222f, 0.99936f, 2.02114f, - -0.53893f, -0.23757f, 0.73566f, 0.25443f, 0.00132f, -0.74036f, -0.75351f, - -0.76964f, -1.71007f, -0.15770f, 1.60982f, 2.17638f, 0.90681f, 0.64973f, - 0.85914f, 0.58786f, -1.46228f, 0.05187f, 1.18804f, 0.30850f, 0.29512f, - 0.40526f, 0.37635f, 0.32311f, 0.37471f, 1.12346f, 3.41856f, -0.36653f, - 0.42537f, -0.19240f, 0.00155f, 0.30826f, -0.02116f, -0.53435f, -0.34829f, - -0.52466f, -0.11521f, -0.29163f, -2.05689f, -2.87372f, -0.62626f, 0.09585f, - -0.75257f, 0.10057f, 1.43474f, 0.89450f, 0.75900f, 1.11147f, 1.00558f, - 0.25886f, 2.22095f, -0.17926f, 0.57161f, 0.39546f, 0.47846f, 0.40452f, - 0.54298f, 0.45814f, -3.62788f, -3.02374f, 0.03716f, -0.13937f, -0.09415f, - -0.12463f, 0.05682f, 0.03672f, 1.20746f, 1.25003f, 1.27071f, 1.31883f, - 1.27473f, 1.34943f, 1.23158f, 0.09039f, 0.19388f, 0.63420f, 2.79612f, - 0.93803f, -0.11323f, -0.02027f, 0.41286f, -0.05979f, -3.80705f, -0.52451f, - -0.77098f, -0.68132f, -0.65559f, -0.60975f, -1.26165f, 0.25582f, 0.05346f, - 0.61403f, 0.32140f, -2.39831f, -1.42355f, 1.30541f, 1.02361f, 0.12930f, - -1.61469f, -0.77036f, -0.59144f, 1.27769f, 1.52068f, 0.82137f, 1.83159f, - -0.66626f, -0.69806f, -1.00564f, -0.85995f, -0.90889f, -0.84412f, -0.85712f, - -1.29848f, 0.39308f, -}; - -static const float av1_tx_type_nn_bias_8x8_ver_layer0[16] = { - -0.14868f, -0.48343f, 3.94416f, -0.78037f, -1.33789f, -0.60611f, - 0.51793f, 0.44030f, -0.71563f, 0.22561f, -1.19083f, -0.46149f, - 0.83015f, 0.06024f, 1.17180f, 0.65122f, -}; - -static const float av1_tx_type_nn_weights_8x8_ver_layer1[64] = { - -1.42711f, -0.21683f, 2.12061f, 0.20489f, -0.50228f, -0.24770f, 0.23391f, - 1.03470f, -0.44847f, -0.63225f, -0.21583f, -0.06467f, -0.21892f, -0.07786f, - 1.43322f, 0.00280f, -1.53057f, -0.18912f, 1.95333f, 0.31151f, -2.07601f, - 0.06776f, 0.25529f, 0.94800f, -1.11453f, -0.20594f, -0.13281f, 0.01485f, - 0.17650f, -0.07955f, 1.43734f, -0.23193f, -2.06463f, -0.21238f, 2.13707f, - 0.30351f, 0.27594f, -0.36245f, 0.19539f, 0.91045f, -0.24068f, -0.37616f, - 0.88792f, 0.02947f, -0.16903f, -0.04932f, 1.51293f, -0.95967f, -1.62903f, - 0.05326f, 2.30703f, 0.64445f, -1.09464f, -0.16623f, 1.00240f, 0.07548f, - -0.50406f, 0.63854f, 1.02340f, 0.49833f, 0.13671f, 0.26722f, 2.09516f, - -0.41305f, -}; - -static const float av1_tx_type_nn_bias_8x8_ver_layer1[4] = { - 2.14067f, - 2.76699f, - 2.04233f, - 1.34803f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_8x8_ver = { - 8, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 16, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_8x8_ver_layer0, - av1_tx_type_nn_weights_8x8_ver_layer1 }, - { av1_tx_type_nn_bias_8x8_ver_layer0, av1_tx_type_nn_bias_8x8_ver_layer1 } -}; -/******************************************************************************/ - -// Tx type model for 8x16 block. -static const float av1_tx_type_nn_weights_8x16_hor_layer0[128] = { - -1.61872f, -1.58520f, -1.41236f, -1.53255f, -1.59794f, -1.25769f, -1.90043f, - 0.73431f, 1.10135f, 0.47054f, 0.43230f, -0.43009f, -0.09135f, -0.07289f, - -0.38785f, 1.23775f, -0.35312f, 0.73789f, 0.88864f, 0.75957f, 0.62579f, - 0.46974f, 0.21851f, 1.63821f, -2.27289f, -0.68522f, -0.69814f, -0.84368f, - -0.91320f, -0.63055f, -1.03296f, 0.55778f, -0.00071f, 1.27539f, 1.60068f, - 1.40975f, 0.97372f, 0.92843f, 1.90853f, 0.12626f, 1.71953f, 1.41978f, - -0.12234f, -1.27058f, 0.76207f, 0.02495f, -0.67038f, -0.05255f, 1.72923f, - 1.47630f, 1.47058f, 1.47614f, 1.49354f, 1.66131f, 1.50801f, 0.17145f, - -2.30947f, -2.10850f, -1.25636f, -0.24900f, 0.72602f, 1.26572f, 0.97865f, - -0.65466f, 1.31129f, 0.26916f, 0.12139f, -0.12761f, -0.39143f, -0.28134f, - 0.06584f, 2.24418f, 0.22516f, 0.05011f, -0.01671f, -0.29476f, -0.40326f, - 0.21138f, -0.11573f, -0.31154f, -0.36828f, 0.03694f, -0.07172f, -0.63419f, - -3.14351f, -1.23125f, 0.65311f, -0.11406f, 1.97287f, -0.10422f, 0.83896f, - 0.85033f, 0.49724f, 0.80482f, 0.51454f, 1.06447f, 0.76693f, 0.72599f, - -0.78573f, -0.53950f, 0.40894f, 0.00086f, 0.10784f, -0.70498f, 1.16395f, - 1.14597f, 1.13496f, 1.12177f, 1.02100f, -1.37574f, -2.97144f, 0.33899f, - 0.42013f, 0.86327f, 2.31983f, 2.04008f, 0.95503f, 0.15081f, 0.11530f, - -0.02574f, -4.77119f, 0.13257f, -0.01704f, -0.23087f, -0.00825f, 0.07029f, - -0.28136f, 0.42556f, -}; - -static const float av1_tx_type_nn_bias_8x16_hor_layer0[16] = { - 0.93617f, -0.24000f, -1.26821f, 0.78780f, 0.13690f, -0.21948f, - -1.45162f, 0.44584f, -1.92582f, -0.23169f, 0.56004f, -1.19937f, - 1.81560f, -1.02643f, -0.81690f, 0.08302f, -}; - -static const float av1_tx_type_nn_weights_8x16_hor_layer1[64] = { - 0.06696f, -0.11538f, -1.42029f, 0.32965f, 0.81046f, 0.01146f, 1.20945f, - -0.16899f, 0.53224f, -0.40232f, 0.01786f, -0.73242f, 1.29750f, 1.95185f, - 0.70143f, 1.43287f, 0.76220f, 0.79937f, -1.79011f, -1.15178f, 0.42526f, - -0.67519f, 0.77267f, -0.30697f, 2.46004f, -0.49828f, 0.02875f, 1.09972f, - 1.47662f, 0.61719f, 0.61417f, -0.12363f, 2.53048f, 0.00418f, -1.38964f, - 0.88117f, 0.39239f, -0.19347f, -2.58600f, -0.33715f, 1.09323f, -0.32127f, - 0.02456f, -0.19125f, 1.12728f, 0.66502f, 0.34296f, 1.14897f, 0.29967f, - 1.19209f, 0.22108f, -0.11975f, 1.49776f, -1.34624f, -2.58478f, -1.34632f, - 1.53207f, 0.45634f, -1.48476f, 0.17489f, 0.71790f, -2.12086f, -1.21778f, - -1.31243f, -}; - -static const float av1_tx_type_nn_bias_8x16_hor_layer1[4] = { - 0.83359f, - 1.06875f, - 1.77645f, - 1.49570f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_8x16_hor = { - 8, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 16, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_8x16_hor_layer0, - av1_tx_type_nn_weights_8x16_hor_layer1 }, - { av1_tx_type_nn_bias_8x16_hor_layer0, av1_tx_type_nn_bias_8x16_hor_layer1 } -}; - -static const float av1_tx_type_nn_weights_8x16_ver_layer0[128] = { - 0.32858f, -1.28887f, 0.25632f, -0.05262f, 2.69203f, -0.07004f, 1.37337f, - -0.05725f, -0.05659f, 0.05592f, 0.01039f, -0.29343f, 1.58628f, -0.30003f, - -3.43118f, 0.00272f, 1.70928f, -0.76348f, 0.05889f, -0.03263f, -0.07724f, - 0.03523f, -0.19890f, 1.18005f, -0.03605f, -0.20530f, -4.00733f, 0.10210f, - -0.05368f, -0.17650f, -0.15317f, 0.06499f, 0.56705f, 1.04341f, 0.62890f, - 0.73451f, -0.22199f, 0.86659f, 0.78443f, -0.61664f, -0.50606f, 0.30247f, - 0.14455f, 0.39276f, 0.49203f, 0.65019f, 0.12269f, 1.64080f, 1.68289f, - 1.42694f, 1.60825f, 1.58501f, 1.47252f, 1.62589f, 1.48218f, 0.17726f, - -0.04884f, 0.35376f, -0.04796f, 0.32589f, 0.35087f, 0.35258f, -0.46103f, - -0.31176f, -0.05203f, 0.07247f, -0.26756f, 0.22019f, 0.03412f, 0.33773f, - 0.29811f, -0.11140f, 0.12831f, -0.44673f, -0.09858f, 0.07889f, 0.15137f, - 0.00347f, -0.23394f, 0.08886f, -0.31201f, -0.79912f, -0.51092f, 0.14123f, - -1.09599f, -4.26020f, -0.68675f, -0.02842f, -1.54538f, -1.28977f, -1.30558f, - -1.21074f, -1.37142f, -1.14743f, -1.85397f, 0.82985f, -0.30681f, 0.04494f, - -0.24023f, -4.18053f, -0.16096f, -0.55492f, -0.27882f, 0.05829f, -0.41224f, - -2.52088f, -0.56162f, -1.04547f, -1.70685f, -0.28842f, -1.43673f, -0.01468f, - -3.20585f, -0.69120f, -0.43931f, -0.46270f, -0.65885f, -0.55884f, -0.75138f, - 0.36381f, -5.70858f, -0.14548f, -0.15745f, -0.11812f, -0.07605f, -0.07693f, - -0.12236f, 0.16075f, -}; - -static const float av1_tx_type_nn_bias_8x16_ver_layer0[16] = { - -0.35385f, 0.30491f, -0.90011f, 0.42941f, 1.20928f, -0.88331f, - -1.48818f, -0.34785f, -0.32668f, -0.22695f, 0.89188f, 0.65521f, - 0.57598f, 0.99819f, 0.75175f, 0.17044f, -}; - -static const float av1_tx_type_nn_weights_8x16_ver_layer1[64] = { - -0.62913f, -0.34304f, 0.42963f, -0.17440f, -1.44092f, 0.69142f, -1.36067f, - 0.52211f, 0.44658f, -0.26501f, -0.41657f, 0.34428f, -0.34390f, -0.58567f, - -0.84097f, -1.96311f, -0.37215f, -0.22250f, -1.23811f, -0.07247f, -0.81731f, - 0.58755f, -1.30559f, 0.39551f, 0.41743f, -0.09940f, -0.33230f, 0.14458f, - -0.25139f, -0.54517f, 0.13469f, -0.38157f, -0.39109f, -0.18205f, 0.06834f, - -0.08395f, -0.92187f, 0.56724f, 1.44381f, 0.53226f, -0.22356f, 0.12285f, - -0.29418f, -1.86749f, -0.22372f, -0.60204f, -0.87746f, -1.16936f, 0.56884f, - 0.62641f, -0.11823f, 1.00395f, 1.64794f, -0.64535f, 2.29322f, -0.23397f, - 0.17251f, -0.35927f, 0.65631f, -0.26812f, 0.80128f, 0.85748f, 0.47404f, - 2.20547f, -}; - -static const float av1_tx_type_nn_bias_8x16_ver_layer1[4] = { - -0.44080f, - -1.67455f, - -1.46332f, - -6.13206f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_8x16_ver = { - 8, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 16, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_8x16_ver_layer0, - av1_tx_type_nn_weights_8x16_ver_layer1 }, - { av1_tx_type_nn_bias_8x16_ver_layer0, av1_tx_type_nn_bias_8x16_ver_layer1 } -}; -/******************************************************************************/ - -// Tx type model for 16x8 block. -static const float av1_tx_type_nn_weights_16x8_hor_layer0[128] = { - 0.02600f, 0.09786f, -1.05107f, -0.35594f, -0.15658f, 2.99828f, -0.07106f, - -0.10101f, -0.14412f, -0.83790f, -0.19434f, 2.28368f, 1.91727f, -0.00956f, - -0.90640f, 0.09174f, 1.58895f, 1.38945f, 1.49431f, 1.51381f, 1.44803f, - 1.53544f, 1.44694f, 0.17753f, 1.69735f, -0.78652f, 0.31092f, -0.23736f, - 0.02231f, -0.09884f, -0.00493f, 1.21189f, -1.94382f, -0.34629f, -0.58309f, - 0.72291f, -0.30056f, 0.90660f, -0.57495f, 3.07809f, 0.73644f, 1.43050f, - 1.34356f, -0.66554f, 0.50102f, -0.64305f, 0.42044f, -1.66165f, -0.05733f, - -2.51402f, -1.01067f, -0.33390f, -0.32986f, -0.92431f, 1.86281f, -0.07290f, - -0.26290f, -0.68941f, 1.81156f, 0.66125f, -2.09974f, 0.17032f, -0.67461f, - -0.00876f, -1.50154f, 1.17153f, 1.00377f, 0.33022f, 0.74689f, 0.42878f, - 0.61725f, -0.83967f, 0.09467f, -0.39892f, 0.33863f, 0.10656f, -0.09249f, - -0.39757f, 0.48481f, -0.35162f, 1.47014f, 1.67827f, -1.84051f, 0.16291f, - -0.50135f, -2.29911f, -0.42217f, -0.13358f, 1.45899f, -0.14743f, -0.02763f, - -0.28003f, -0.01364f, 0.21014f, -0.29026f, -0.20198f, 1.38782f, 0.56731f, - 0.27489f, 0.43227f, 0.41326f, 0.42721f, 0.87720f, -1.90067f, -5.04951f, - -0.17638f, -0.58119f, -0.08954f, -0.13692f, -0.12325f, -0.38548f, 0.66462f, - -1.42377f, -1.21917f, -1.38193f, -1.36539f, -1.39378f, -1.19629f, -1.59812f, - 0.28689f, 0.32394f, 0.52128f, 0.01013f, -0.28948f, -0.26293f, -0.44331f, - -0.36570f, -0.50757f, -}; - -static const float av1_tx_type_nn_bias_16x8_hor_layer0[16] = { - -0.08696f, -0.22110f, -1.43604f, -1.00451f, -1.51029f, 0.63736f, - 0.45260f, 0.16229f, 4.01393f, -0.21748f, 0.36411f, -0.08764f, - -0.12329f, 0.08986f, 1.08117f, -0.00220f, -}; - -static const float av1_tx_type_nn_weights_16x8_hor_layer1[64] = { - 0.55824f, -0.14648f, 0.81947f, -0.45867f, -1.86078f, -0.17291f, 0.34849f, - 0.15153f, 1.75625f, -0.25760f, 0.72015f, -0.30059f, -0.57975f, 0.07609f, - -0.02036f, 0.07912f, 0.57080f, -0.13792f, 0.74184f, -0.87669f, -1.87572f, - -0.27270f, 0.39751f, 0.19652f, 2.03514f, -0.32944f, 0.76251f, 0.04399f, - -0.63175f, 0.37420f, 0.08309f, 0.04466f, 0.60255f, -0.12820f, 1.66065f, - -0.59496f, -1.94794f, -0.14847f, 0.39424f, 0.16273f, 1.80587f, 0.41197f, - 0.74691f, -0.21217f, -0.63173f, 0.09510f, -0.35538f, -0.04407f, 0.92847f, - 0.20141f, 1.68680f, -0.56528f, -2.26960f, 0.12978f, 0.73748f, 0.42438f, - 2.00673f, -0.40189f, 0.95423f, 0.23234f, -0.80953f, 0.65814f, 0.49444f, - -0.23347f, -}; - -static const float av1_tx_type_nn_bias_16x8_hor_layer1[4] = { - 3.57175f, - 2.42612f, - 3.31259f, - 2.08287f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_16x8_hor = { - 8, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 16, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_16x8_hor_layer0, - av1_tx_type_nn_weights_16x8_hor_layer1 }, - { av1_tx_type_nn_bias_16x8_hor_layer0, av1_tx_type_nn_bias_16x8_hor_layer1 } -}; - -static const float av1_tx_type_nn_weights_16x8_ver_layer0[128] = { - 0.46633f, 1.55328f, -0.11230f, -0.29571f, 0.18814f, -1.52430f, -2.34660f, - 0.08644f, -1.97718f, -1.29140f, -1.12262f, -1.12985f, -1.25911f, -0.96506f, - -1.57129f, 0.96021f, 1.34192f, 1.28623f, 1.21655f, 1.28758f, 1.25482f, - 1.30195f, 1.19190f, 0.09310f, 0.52072f, 0.91487f, 1.24100f, 1.61236f, - 1.72166f, 2.20750f, 1.62379f, -1.43936f, 0.50665f, 0.40213f, 0.66502f, - -1.66699f, -3.07618f, 0.05877f, 0.60987f, -0.09995f, -0.10916f, 0.48049f, - 0.23812f, 0.39847f, -0.21682f, -0.63455f, 0.33453f, -0.67939f, -4.14355f, - -0.62756f, -0.22502f, -0.17215f, 0.01062f, 0.27049f, -0.10748f, 0.30945f, - 2.72445f, -0.89181f, -0.06800f, 0.20595f, -0.73385f, 0.04071f, -1.30294f, - 1.83507f, 0.92570f, 0.69609f, 0.76285f, 0.69892f, 0.76409f, 0.63104f, - 0.73397f, 1.09575f, -0.20129f, -0.24022f, -0.24599f, -0.59107f, -0.88755f, - -0.68987f, -0.75495f, -1.31002f, -1.30237f, -0.94093f, -2.15678f, -1.49303f, - -1.17498f, -1.39952f, -0.91270f, -0.05587f, 1.02381f, -0.75580f, -0.65263f, - -0.78996f, -0.71075f, -0.71018f, -0.70350f, -1.26196f, 2.34208f, -0.53611f, - 0.19752f, -0.16842f, -0.24828f, 0.21857f, 0.08222f, -2.55894f, -1.75702f, - 0.11394f, 1.03083f, 0.79972f, -1.54112f, -1.82341f, -0.57597f, -0.02077f, - -0.39616f, -0.00995f, -0.12809f, 0.01188f, -0.25117f, 0.09202f, 0.09336f, - -0.05614f, -0.30039f, 0.25834f, 1.19944f, 1.22533f, 0.92330f, 0.75967f, - -0.81945f, -0.41647f, -}; - -static const float av1_tx_type_nn_bias_16x8_ver_layer0[16] = { - 0.17841f, 0.67315f, -1.24450f, 3.13859f, 0.16203f, -0.14992f, - 0.29553f, -1.15567f, -0.71421f, 1.15977f, 1.14585f, 3.02460f, - -0.04510f, 0.48000f, -0.09354f, -0.42422f, -}; - -static const float av1_tx_type_nn_weights_16x8_ver_layer1[64] = { - 0.29912f, -0.10009f, -1.11478f, 1.76812f, -0.27719f, 0.52148f, 0.17622f, - -1.17116f, 0.73397f, -0.69279f, -0.11080f, 1.53751f, -1.42003f, 0.14731f, - 0.13592f, -0.04883f, 0.39186f, -0.13655f, -0.43994f, 1.82759f, -0.25601f, - -0.15018f, 0.51920f, -1.56070f, 0.31683f, -0.79367f, -0.02904f, 1.28637f, - -1.15203f, 0.26627f, 0.42828f, -0.24258f, 0.38647f, -0.83352f, 0.32553f, - 2.09522f, -0.26822f, -0.42191f, 0.32825f, -1.30748f, 1.50551f, -0.52669f, - 0.20045f, 1.69318f, -1.47839f, 0.30802f, -0.07290f, -0.28106f, 0.68192f, - -0.15522f, 1.12579f, 2.21921f, 0.09720f, -0.50265f, 0.83165f, -1.31721f, - 0.72422f, -1.24952f, 0.61653f, 2.04117f, -1.42406f, 0.52568f, -0.46180f, - -0.00873f, -}; - -static const float av1_tx_type_nn_bias_16x8_ver_layer1[4] = { - 3.34981f, - 3.74710f, - 1.38339f, - 0.45176f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_16x8_ver = { - 8, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 16, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_16x8_ver_layer0, - av1_tx_type_nn_weights_16x8_ver_layer1 }, - { av1_tx_type_nn_bias_16x8_ver_layer0, av1_tx_type_nn_bias_16x8_ver_layer1 } -}; -/******************************************************************************/ - -// Tx type model for 16x16 block. -static const float av1_tx_type_nn_weights_16x16_layer0[128] = { - 1.26592f, 1.36313f, 1.30956f, 1.29926f, 1.48816f, 1.68851f, 1.32000f, - 0.13321f, -0.22477f, -0.88906f, -0.19622f, 1.69605f, 1.22180f, -1.57771f, - -1.15765f, 0.05710f, -1.13355f, -0.85486f, -0.99971f, -0.91571f, -1.06031f, - -0.77952f, -1.15723f, 1.17809f, 1.35602f, -0.05243f, -0.37596f, 0.26108f, - 0.17611f, -0.10323f, 0.77279f, -0.48911f, -0.79308f, 0.55112f, 0.43918f, - 0.27872f, 0.28714f, 0.45830f, 1.05689f, 0.03705f, -2.49975f, -0.01940f, - 0.05709f, 0.07942f, -0.13290f, -0.10359f, 0.00143f, 0.37303f, 0.96470f, - 0.53293f, 1.14459f, 0.89185f, 0.43378f, 0.47764f, 0.90924f, 0.15279f, - -0.15361f, 0.02949f, 0.42240f, 0.68143f, 0.89588f, 0.73754f, 0.10974f, - 1.57755f, -0.39870f, -0.32914f, 0.35638f, 0.34991f, -0.00003f, -0.23373f, - 0.29630f, -0.76699f, -0.01356f, 0.04234f, 0.84253f, 1.92078f, 0.93160f, - 0.71993f, 0.71604f, 0.76455f, -1.59782f, 0.32332f, 1.11628f, 0.33062f, - -0.03728f, -0.05710f, 0.80447f, -0.14719f, 1.34658f, -0.05718f, 0.64015f, - 0.21926f, 0.41653f, 0.12720f, 0.54092f, 1.39411f, 1.81819f, -0.24513f, - 0.00955f, 0.38011f, -0.57787f, -0.41759f, 0.68834f, -0.31783f, -0.40607f, - -0.10107f, -0.79374f, 0.75599f, -0.16282f, -0.14490f, -0.20783f, -0.55019f, - -0.13793f, -0.22293f, 0.18305f, 0.12445f, 0.56830f, 0.24567f, 0.09278f, - 0.70803f, 0.35803f, -1.52676f, -0.89624f, 0.77665f, 0.19877f, 0.77175f, - 0.50355f, 0.08592f, -}; - -static const float av1_tx_type_nn_bias_16x16_layer0[16] = { - -1.31834f, 0.14346f, -0.10062f, 0.84489f, 0.95617f, -0.06720f, - -0.68502f, -0.91442f, -0.31932f, 0.25276f, -0.15138f, -1.57661f, - -0.14062f, -0.42120f, 0.94573f, -0.09287f, -}; - -static const float av1_tx_type_nn_weights_16x16_layer1[64] = { - -1.80333f, -1.06353f, 0.55139f, 0.74644f, 0.13747f, -0.93018f, -0.10286f, - 0.67133f, 0.24460f, 1.44583f, 0.02173f, 0.26037f, -0.73687f, 0.19566f, - 0.61846f, -0.58601f, -1.03196f, -0.74415f, 0.30041f, -0.41967f, 1.08740f, - 0.96224f, -0.59139f, 0.03813f, 0.05403f, 1.33427f, -0.54375f, -1.92181f, - 0.54704f, 0.13608f, 0.22151f, -0.38076f, 1.18390f, -0.77508f, -1.84283f, - 1.00894f, 0.62318f, -0.15296f, 1.27600f, 0.22822f, 0.12751f, 0.93910f, - -0.28502f, 0.53912f, -0.96889f, 0.10182f, 0.81508f, -0.43028f, 2.67386f, - 0.52204f, 0.49820f, -0.41711f, 1.05038f, 1.12192f, 0.74349f, -0.75417f, - -0.03718f, -0.35769f, 0.89651f, 0.63236f, 0.54215f, -0.07894f, 0.48274f, - 1.08829f, -}; - -static const float av1_tx_type_nn_bias_16x16_layer1[4] = { - 0.81986f, - 1.26865f, - 0.11118f, - 2.48404f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_16x16 = { - 8, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 16, - }, // num_hidden_nodes - { - av1_tx_type_nn_weights_16x16_layer0, - av1_tx_type_nn_weights_16x16_layer1, - }, - { - av1_tx_type_nn_bias_16x16_layer0, - av1_tx_type_nn_bias_16x16_layer1, - }, -}; -/******************************************************************************/ - -// Tx type model for 4x16 block. -static const float av1_tx_type_nn_weights_4x16_hor_layer0[32] = { - 0.36539f, 0.25667f, 0.01491f, -0.21959f, 2.55105f, 0.17615f, 1.79884f, - 1.65936f, -0.44363f, 0.00706f, -0.68004f, -0.64360f, 1.75760f, 1.91906f, - 1.47682f, 0.09650f, -3.59244f, -0.35004f, 0.93295f, 0.25806f, -0.08154f, - 0.79332f, 0.79535f, 1.09467f, 1.57855f, -0.51359f, 0.90553f, -1.67744f, - -1.74563f, -0.88830f, -1.77603f, 2.15935f, -}; - -static const float av1_tx_type_nn_bias_4x16_hor_layer0[8] = { - -0.36435f, -2.22731f, -0.00837f, -1.34546f, - 0.62806f, -0.20675f, 4.91940f, -0.56079f, -}; - -static const float av1_tx_type_nn_weights_4x16_hor_layer1[32] = { - -0.57191f, -1.46418f, 0.67331f, -1.15027f, 0.46288f, 0.81251f, 2.51768f, - -0.27147f, 0.00761f, -2.15214f, -0.69650f, -0.50808f, 0.92832f, 0.45668f, - 2.34201f, -0.52941f, 0.51008f, -1.55496f, -0.01371f, -0.12356f, 0.66624f, - 0.88043f, 2.64862f, -1.28024f, -0.17578f, -1.80034f, -0.32217f, 0.89519f, - 1.28413f, -0.30326f, 2.45329f, -0.83335f, -}; - -static const float av1_tx_type_nn_bias_4x16_hor_layer1[4] = { - 2.33198f, - 3.36245f, - 1.62603f, - 2.91056f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_4x16_hor = { - 4, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 8, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_4x16_hor_layer0, - av1_tx_type_nn_weights_4x16_hor_layer1 }, - { av1_tx_type_nn_bias_4x16_hor_layer0, av1_tx_type_nn_bias_4x16_hor_layer1 } -}; - -static const float av1_tx_type_nn_weights_4x16_ver_layer0[128] = { - 1.61392f, 1.41239f, 1.47646f, 1.47325f, 1.46110f, 1.49208f, 1.49414f, - 0.12835f, -0.76986f, 0.07087f, -0.24572f, -0.93168f, 3.07935f, -0.18183f, - -0.09831f, -0.07703f, -0.03222f, -0.25473f, -0.06090f, 2.93713f, -0.38711f, - -0.12884f, -0.18329f, -0.06262f, -0.00327f, -0.02930f, -0.01641f, -0.00622f, - -0.03305f, -4.07069f, -2.76643f, 0.04413f, -1.03176f, -0.19217f, -0.44980f, - -2.48615f, -2.58112f, -0.87695f, 0.16187f, -0.04891f, -0.06854f, 1.08104f, - 0.75245f, 1.49302f, 0.63363f, 1.45715f, 0.92574f, 1.72029f, 0.33326f, - 3.86646f, 0.04422f, 0.41019f, 0.36212f, 0.56600f, -1.01552f, 0.05128f, - 0.40454f, -1.05100f, -0.47461f, -1.33168f, -0.46145f, -1.36870f, -0.88838f, - -1.05358f, -0.18537f, -0.34357f, -0.03698f, 0.68905f, 0.41010f, 0.31223f, - -0.43382f, -0.74715f, 2.03366f, -0.30419f, 0.45747f, 0.09526f, 0.31678f, - 0.22915f, 0.21832f, 1.26385f, -0.06814f, -0.71417f, -1.18947f, 0.03762f, - 0.10936f, 2.97396f, -0.42638f, -0.03123f, -5.49756f, -0.17029f, -0.11323f, - 0.05173f, -0.44274f, -0.15738f, 0.11311f, 0.43872f, 0.16837f, -0.52849f, - 2.90050f, -0.54735f, -0.29591f, 1.24030f, 0.21696f, -0.04443f, -1.60877f, - -1.36365f, -1.27432f, -1.52060f, -1.34397f, -1.13371f, -1.87554f, 0.80123f, - 0.42820f, -0.14157f, -2.73963f, -0.68040f, -0.35236f, 0.14490f, 2.23477f, - 0.01370f, -0.20426f, -1.51411f, -0.72293f, 0.64516f, 0.97638f, 0.32616f, - -0.27975f, -0.01149f, -}; - -static const float av1_tx_type_nn_bias_4x16_ver_layer0[16] = { - -1.37863f, -0.05763f, -0.07041f, 0.15306f, 0.96026f, -1.42105f, - -0.55822f, 1.04845f, -0.17662f, -1.25345f, -0.11927f, 0.49845f, - -0.32530f, 0.73483f, 0.08322f, -0.23890f, -}; - -static const float av1_tx_type_nn_weights_4x16_ver_layer1[64] = { - 0.27194f, 0.50607f, 0.49229f, -0.48192f, 0.15667f, -1.38891f, 0.38102f, - -0.58825f, -0.07337f, -0.52909f, 0.36975f, 0.28710f, 0.34992f, -0.73630f, - 0.30386f, -0.58822f, 0.36127f, 0.57950f, 0.55878f, -0.42796f, 0.19967f, - -1.45517f, 0.42529f, -0.54630f, -0.38169f, -0.84899f, 0.41622f, 0.46935f, - 0.39077f, -0.75448f, 0.31698f, -0.76187f, 0.97765f, 0.57052f, 0.55825f, - -0.54273f, 0.20466f, -1.46347f, 0.41813f, -0.55019f, -0.19948f, -0.57982f, - 0.41206f, 0.32373f, 0.38537f, -1.11657f, 0.32887f, -0.76911f, 1.12259f, - 0.72163f, 0.82603f, 0.37786f, 0.34976f, -1.86642f, 0.59961f, -0.16329f, - -0.36631f, -0.56814f, 0.60410f, 0.53158f, 0.56389f, -0.70508f, 0.51009f, - -0.56513f, -}; - -static const float av1_tx_type_nn_bias_4x16_ver_layer1[4] = { - 4.60896f, - 4.53551f, - 4.53124f, - 4.27435f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_4x16_ver = { - 8, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 16, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_4x16_ver_layer0, - av1_tx_type_nn_weights_4x16_ver_layer1 }, - { av1_tx_type_nn_bias_4x16_ver_layer0, av1_tx_type_nn_bias_4x16_ver_layer1 } -}; -/******************************************************************************/ - -// Tx type model for 16x4 block. -static const float av1_tx_type_nn_weights_16x4_hor_layer0[128] = { - 1.45347f, -0.15743f, 0.44236f, 0.25808f, 0.33944f, 0.38678f, 0.24428f, - 1.67287f, 0.09539f, -0.42940f, -0.31507f, -0.00154f, -2.98755f, -2.27744f, - -0.49183f, 0.09333f, -0.99026f, -0.22157f, 0.53701f, 0.60447f, 0.15686f, - -0.04646f, 0.26341f, 2.12361f, 0.27090f, -1.14716f, -0.64146f, -0.91604f, - -0.75335f, -0.60056f, -1.25084f, 1.68473f, -3.24075f, -4.03867f, -2.07877f, - -0.02347f, 0.00333f, -0.01259f, -0.00465f, 0.02526f, 0.36286f, -0.10324f, - 2.12780f, -0.74584f, -1.05052f, 1.78467f, -0.55065f, -0.03326f, 2.46781f, - 1.18349f, 0.96015f, 1.01696f, 1.10584f, 1.07263f, 1.11531f, -1.06413f, - 0.32389f, -1.87360f, -0.14435f, 1.77926f, 1.09966f, -0.12680f, -0.61386f, - -0.09724f, -0.33095f, 1.12122f, 1.00791f, 1.52416f, 1.35004f, 1.32657f, - 0.60950f, -1.13538f, -0.38654f, 0.06473f, 2.10669f, 0.27734f, -0.38359f, - -1.91455f, -1.22676f, 0.05786f, 0.97432f, 2.19967f, 0.50457f, 0.78976f, - 0.95183f, -0.32414f, 0.49437f, -0.04506f, 0.18993f, -0.07971f, 0.23889f, - -0.09872f, -0.66036f, 0.05377f, 2.69638f, -0.08259f, -0.69210f, -1.08296f, - -1.96504f, -2.31947f, -0.80161f, -0.80456f, -1.35556f, -0.05323f, -4.42658f, - -0.30732f, -0.12043f, 0.11126f, 0.10771f, -0.14956f, -0.02218f, 0.41016f, - 1.16599f, 1.14629f, 1.12881f, 1.18676f, 1.24677f, 1.28695f, 1.11270f, - 0.08233f, 1.75440f, 0.49228f, -0.34858f, -0.17032f, 0.29288f, 0.47175f, - 0.19055f, -1.56413f, -}; - -static const float av1_tx_type_nn_bias_16x4_hor_layer0[16] = { - -1.71227f, 0.47291f, -0.97536f, -0.66216f, 0.11729f, -0.21451f, - 2.75281f, 0.04318f, 2.03965f, 0.14618f, -0.70483f, -0.24517f, - 1.14048f, 0.33308f, -1.10886f, 0.41184f, -}; - -static const float av1_tx_type_nn_weights_16x4_hor_layer1[64] = { - -1.17079f, 0.19096f, -1.05753f, -0.30803f, -1.21680f, -0.67255f, 1.60115f, - 0.05972f, 1.44759f, -0.04068f, -0.26331f, 0.31400f, 0.96923f, 0.33443f, - -0.77215f, -0.91316f, -1.78928f, 0.21483f, -1.24008f, -0.46190f, -0.12127f, - -0.62144f, 1.37593f, 0.08373f, 1.56215f, 0.00279f, -0.14556f, 0.38710f, - 0.96228f, 0.66433f, -0.51798f, -0.80738f, -0.18539f, 0.19377f, -1.03090f, - -1.51044f, -0.59485f, -0.62589f, 1.90742f, 0.09078f, 1.49113f, 0.00205f, - -0.15918f, 0.40827f, 1.08553f, 0.43431f, 0.33519f, -1.12669f, -1.10274f, - 0.80004f, -1.83599f, -0.53134f, 2.00515f, -0.32670f, 1.37124f, 0.51136f, - 1.62563f, 0.24787f, 0.31757f, 0.81751f, 1.57262f, 0.83214f, 1.04661f, - -0.43819f, -}; - -static const float av1_tx_type_nn_bias_16x4_hor_layer1[4] = { - 2.32575f, - 2.75703f, - 1.12304f, - 2.15567f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_16x4_hor = { - 8, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 16, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_16x4_hor_layer0, - av1_tx_type_nn_weights_16x4_hor_layer1 }, - { av1_tx_type_nn_bias_16x4_hor_layer0, av1_tx_type_nn_bias_16x4_hor_layer1 } -}; - -static const float av1_tx_type_nn_weights_16x4_ver_layer0[32] = { - 0.26047f, 0.99930f, 1.16484f, -0.28196f, -2.67483f, -0.21456f, -0.16854f, - 0.46375f, 1.47951f, 1.13735f, 1.12356f, 0.27385f, 0.50978f, 2.09967f, - -1.47386f, 0.01950f, -0.06362f, 0.26014f, 1.04544f, -0.03099f, 0.07478f, - -0.39701f, 0.05545f, 2.73633f, -0.56305f, -0.02208f, -0.44517f, -0.00897f, - -0.17967f, -0.96622f, 0.42635f, -1.04784f, -}; - -static const float av1_tx_type_nn_bias_16x4_ver_layer0[8] = { - -0.52088f, 0.52844f, -1.03655f, -0.30974f, - 2.59952f, -1.93604f, 0.00000f, 2.51787f, -}; - -static const float av1_tx_type_nn_weights_16x4_ver_layer1[32] = { - 0.10916f, -0.21219f, -0.51340f, 0.69161f, 1.45988f, -1.36942f, -0.40899f, - 1.05136f, -0.08486f, 0.10008f, -0.55304f, 0.88012f, 1.61177f, -1.64507f, - 0.63428f, 1.15130f, -0.17287f, -0.18592f, -0.01143f, 0.88293f, 1.73326f, - -1.63624f, 0.09359f, 1.18393f, 0.26531f, 0.22378f, 0.15170f, 1.06965f, - 1.26814f, -1.93873f, -0.00768f, 1.58309f, -}; - -static const float av1_tx_type_nn_bias_16x4_ver_layer1[4] = { - 2.34713f, - 1.68667f, - 1.25488f, - 1.69812f, -}; - -static const NN_CONFIG av1_tx_type_nnconfig_16x4_ver = { - 4, // num_inputs - 4, // num_outputs - 1, // num_hidden_layers - { - 8, - }, // num_hidden_nodes - { av1_tx_type_nn_weights_16x4_ver_layer0, - av1_tx_type_nn_weights_16x4_ver_layer1 }, - { av1_tx_type_nn_bias_16x4_ver_layer0, av1_tx_type_nn_bias_16x4_ver_layer1 } -}; -/******************************************************************************/ - -// Map tx_size to its corresponding neural net model for tx type prediction. -static const NN_CONFIG *av1_tx_type_nnconfig_map_hor[] = { - &av1_tx_type_nnconfig_4x4_hor, // 4x4 transform - &av1_tx_type_nnconfig_8x8_hor, // 8x8 transform - &av1_tx_type_nnconfig_16x16, // 16x16 transform - NULL, // 32x32 transform - NULL, // 64x64 transform - &av1_tx_type_nnconfig_4x8_hor, // 4x8 transform - &av1_tx_type_nnconfig_8x4_hor, // 8x4 transform - &av1_tx_type_nnconfig_8x16_hor, // 8x16 transform - &av1_tx_type_nnconfig_16x8_hor, // 16x8 transform - NULL, // 16x32 transform - NULL, // 32x16 transform - NULL, // 32x64 transform - NULL, // 64x32 transform - &av1_tx_type_nnconfig_4x16_hor, // 4x16 transform - &av1_tx_type_nnconfig_16x4_hor, // 16x4 transform - NULL, // 8x32 transform - NULL, // 32x8 transform - NULL, // 16x64 transform - NULL, // 64x16 transform -}; - -static const NN_CONFIG *av1_tx_type_nnconfig_map_ver[] = { - &av1_tx_type_nnconfig_4x4_ver, // 4x4 transform - &av1_tx_type_nnconfig_8x8_ver, // 8x8 transform - &av1_tx_type_nnconfig_16x16, // 16x16 transform - NULL, // 32x32 transform - NULL, // 64x64 transform - &av1_tx_type_nnconfig_4x8_ver, // 4x8 transform - &av1_tx_type_nnconfig_8x4_ver, // 8x4 transform - &av1_tx_type_nnconfig_8x16_ver, // 8x16 transform - &av1_tx_type_nnconfig_16x8_ver, // 16x8 transform - NULL, // 16x32 transform - NULL, // 32x16 transform - NULL, // 32x64 transform - NULL, // 64x32 transform - &av1_tx_type_nnconfig_4x16_ver, // 4x16 transform - &av1_tx_type_nnconfig_16x4_ver, // 16x4 transform - NULL, // 8x32 transform - NULL, // 32x8 transform - NULL, // 16x64 transform - NULL, // 64x16 transform -}; - -// Tx split model for 4x8 block. -static const float av1_tx_split_nn_weights_4x8_layer0[8 * 16] = { - 0.068650f, -0.732073f, -0.040361f, 0.322550f, -0.021123f, 0.212518f, - -0.350546f, 0.435987f, -0.111756f, -0.401568f, 0.069548f, -0.313000f, - 0.073918f, -0.373805f, -0.775810f, -0.124753f, 0.181094f, -0.602641f, - -0.026219f, -0.350112f, 0.020599f, -0.311752f, -0.476482f, -0.669465f, - -0.310921f, 0.348869f, -0.115984f, 0.154250f, 0.200485f, -0.016689f, - 0.020392f, 0.413810f, 0.634064f, -0.627530f, 0.399178f, -0.012284f, - 0.472030f, 0.091087f, -0.706100f, -0.447944f, -0.274226f, 0.445656f, - 0.309339f, 0.505522f, 0.038496f, -0.152809f, 0.408684f, -0.068151f, - 0.271612f, 0.353233f, -0.150365f, 0.075212f, -0.035096f, 0.346615f, - 0.124382f, 0.477072f, 0.216288f, 0.070548f, -0.106362f, 0.681613f, - -0.145502f, -0.218631f, -0.099248f, -0.001983f, -0.196819f, -0.969045f, - 0.063009f, -0.123053f, 0.104875f, -0.137581f, -0.282933f, -0.003624f, - -0.315659f, -0.333523f, -0.503000f, -0.100063f, -0.536711f, -0.059978f, - -0.670248f, -0.353762f, 0.181109f, 0.289715f, -0.071206f, 0.261141f, - 0.052796f, -0.114554f, -0.139214f, -0.261380f, 0.075984f, -0.647925f, - -0.099528f, -0.677814f, 0.015712f, -0.389385f, -0.095622f, -0.165117f, - -0.109454f, -0.175240f, -0.393914f, 0.212330f, 0.037822f, 0.248280f, - 0.180197f, 0.110493f, -0.525727f, -0.092329f, -0.524029f, -0.407364f, - -0.542373f, -0.435626f, -0.912194f, 0.062794f, 0.160433f, 0.741485f, - -0.103659f, -0.119327f, -0.055275f, 0.334358f, 0.014713f, 0.046327f, - 0.831114f, -0.576682f, 0.354369f, -0.082088f, 0.452331f, 0.039730f, - -0.792429f, -0.385862f, -}; - -static const float av1_tx_split_nn_bias_4x8_layer0[16] = { - 0.238621f, 2.186830f, 1.383035f, -0.867139f, 1.257119f, -0.351571f, - -0.240650f, -0.971692f, 2.744843f, 1.116991f, 0.139062f, -0.165332f, - 0.262171f, -1.598153f, -1.427340f, -1.602306f, -}; - -static const float av1_tx_split_nn_weights_4x8_layer1[16] = { - -0.367134f, 1.373058f, -0.897039f, -0.326819f, -0.734030f, -0.290413f, - -0.501249f, 0.505321f, -0.537692f, -0.767893f, 0.268697f, 0.278987f, - 0.085082f, 0.614986f, 0.847904f, 0.637578f, -}; - -static const float av1_tx_split_nn_bias_4x8_layer1[1] = { - 0.20586078f, -}; - -static const NN_CONFIG av1_tx_split_nnconfig_4x8 = { - 8, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 16, - }, // num_hidden_nodes - { - av1_tx_split_nn_weights_4x8_layer0, - av1_tx_split_nn_weights_4x8_layer1, - }, - { - av1_tx_split_nn_bias_4x8_layer0, - av1_tx_split_nn_bias_4x8_layer1, - }, -}; -/******************************************************************************/ - -// Tx split model for 8x8 block. -static const float av1_tx_split_nn_weights_8x8_layer0[144] = { - 0.177983f, -0.938386f, -0.074460f, -0.221843f, -0.073182f, -0.295155f, - -0.098202f, -0.279510f, 0.001054f, -0.119319f, -1.835282f, -0.581507f, - -1.222222f, -1.049006f, -0.807508f, -0.454252f, -0.774879f, -0.180607f, - -0.886976f, -0.231971f, -0.824677f, -0.351872f, -1.323819f, 0.235378f, - 0.015331f, -0.341818f, 0.145549f, -0.348362f, 0.147647f, -0.323400f, - 0.047558f, -0.553025f, -0.295485f, -0.330368f, -0.530605f, -0.407516f, - 0.447740f, 0.782381f, -0.179164f, -0.584675f, -0.052645f, 0.038656f, - -0.096783f, 0.038342f, -0.170762f, -0.405844f, -0.552665f, -0.509866f, - 0.757204f, -1.296465f, 0.631015f, 0.009265f, 0.646192f, 0.044523f, - 0.653161f, 0.033820f, 0.849639f, -0.068555f, -1.036085f, -0.511652f, - 0.104693f, -1.458690f, 0.286051f, -0.089800f, 0.381564f, -0.302640f, - 0.304465f, -0.268706f, 0.432603f, -0.117914f, -2.070031f, -0.565696f, - -0.073027f, -1.783570f, -0.318144f, -0.320990f, -0.343966f, -0.140996f, - -0.322977f, -0.232147f, -0.373210f, -0.158266f, -1.922305f, -0.634373f, - 0.101894f, -0.221847f, 0.018412f, -0.423887f, -0.266684f, -0.444930f, - -0.196237f, 0.106638f, -0.065834f, -0.538401f, -0.280772f, -0.620348f, - 1.089957f, -0.799928f, 0.504112f, -0.165763f, 0.578741f, -0.172653f, - 0.547316f, -0.143484f, 0.717220f, -0.297190f, -1.237854f, -0.074819f, - -0.977304f, -0.484092f, -0.646427f, -0.451443f, -0.612126f, -0.224475f, - -0.731608f, -0.257077f, -0.665857f, -0.346742f, -1.216372f, 0.227267f, - 0.231249f, -1.693073f, -0.035899f, 0.380845f, -0.058476f, 0.409405f, - -0.066679f, 0.406731f, -0.068501f, 0.396748f, 0.639462f, 0.150834f, - -0.418659f, -1.421931f, 0.101889f, 0.083573f, 0.129746f, 0.134460f, - 0.081185f, 0.127420f, 0.083664f, 0.051096f, 1.361688f, 0.386093f, -}; - -static const float av1_tx_split_nn_bias_8x8_layer0[12] = { - 4.280443f, 2.218902f, -0.256953f, 3.161431f, 2.082548f, 2.506052f, - 2.563224f, 1.421976f, -1.627813f, -1.436085f, 2.297265f, 1.500469f, -}; - -static const float av1_tx_split_nn_weights_8x8_layer1[12] = { - 1.178833f, -0.428527f, -0.078737f, 0.381434f, -0.466895f, -0.901745f, - -0.766968f, -0.356663f, 0.450146f, 0.509370f, -0.356604f, -0.443506f, -}; - -static const float av1_tx_split_nn_bias_8x8_layer1[1] = { - -0.156294f, -}; - -static const NN_CONFIG av1_tx_split_nnconfig_8x8 = { - 12, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 12, - }, // num_hidden_nodes - { - av1_tx_split_nn_weights_8x8_layer0, - av1_tx_split_nn_weights_8x8_layer1, - }, - { - av1_tx_split_nn_bias_8x8_layer0, - av1_tx_split_nn_bias_8x8_layer1, - }, -}; -/******************************************************************************/ - -// Tx split model for 8x16 block. -static const float av1_tx_split_nn_weights_8x16_layer0[8 * 64] = { - 0.374660f, 0.218905f, -0.139779f, 0.212141f, 0.056517f, 0.051114f, - 0.042860f, -0.273258f, -0.340809f, 0.138983f, -0.216996f, -0.241519f, - -0.123244f, 0.078577f, -0.472273f, -0.194201f, 0.125056f, 0.239761f, - -0.332782f, 0.174782f, -0.211400f, -0.129795f, 0.062195f, 0.113176f, - -0.008869f, 0.140764f, 0.059833f, 0.163826f, 0.359293f, -0.109797f, - -0.022091f, -0.059536f, -0.188226f, 0.179709f, 0.031386f, 0.164790f, - 0.214364f, 0.198555f, 0.152262f, -0.242980f, 0.319367f, -0.136902f, - 0.046524f, -0.043591f, 0.342178f, -0.011757f, -0.014286f, 0.072871f, - -0.278314f, -0.345303f, -0.252103f, -0.107154f, -0.235101f, -0.106739f, - -0.120865f, -0.160042f, 0.240028f, 0.112902f, -0.141587f, -0.703012f, - -0.136591f, 0.318993f, -0.154417f, -0.054668f, 0.192870f, 0.176166f, - -0.029965f, 0.266942f, -0.178384f, 0.038680f, 0.134403f, -0.002426f, - 0.534825f, -0.070923f, 0.413281f, 0.418148f, 0.093729f, 0.016454f, - 0.305358f, -0.040512f, 0.069904f, -0.227588f, -0.362220f, -0.031604f, - -0.394901f, 0.071506f, -0.342833f, -0.142550f, -0.164005f, 0.182600f, - 0.213062f, 0.076805f, 0.278758f, 0.125613f, -0.035552f, 0.040971f, - 0.182785f, -0.227961f, -0.105413f, -0.074949f, -0.084629f, -0.254767f, - 0.114657f, 0.047121f, 0.195902f, 0.264759f, 0.017799f, 0.210230f, - 0.150749f, -0.142142f, 0.182494f, -0.142415f, -0.259782f, -0.114830f, - -0.198826f, 0.000061f, -0.375668f, -0.276656f, -0.373202f, 0.210298f, - 0.422680f, 0.066960f, 0.351106f, -0.209034f, 0.367195f, -0.110274f, - 0.115573f, -0.066642f, -0.389673f, -0.260447f, 0.056949f, -0.180425f, - 0.069922f, -0.153506f, -0.097053f, -0.111757f, 0.094069f, 0.144837f, - -0.052984f, -0.506681f, -0.034474f, 0.279057f, -0.105025f, 0.006656f, - -0.125017f, -0.114096f, 0.103153f, -0.117402f, -0.359472f, 0.072534f, - 0.110291f, 0.003088f, -0.456897f, 0.038331f, -0.322298f, 0.113942f, - -0.119916f, -0.194392f, 0.093167f, 0.193459f, 0.074671f, 0.033602f, - 0.004440f, -0.179578f, -0.036637f, -0.216172f, -0.296530f, -0.318992f, - 0.319160f, -0.066218f, 0.291246f, 0.181292f, 0.089914f, 0.025273f, - 0.303128f, 0.019063f, 0.078545f, -0.396919f, 0.014065f, -0.122121f, - 0.037107f, -0.151886f, -0.299392f, -0.172207f, -0.124571f, -0.232553f, - 0.102970f, -0.225040f, 0.061059f, -0.258188f, -0.469871f, -0.099607f, - -0.061524f, -0.213700f, 0.070237f, -0.289134f, -0.238225f, 0.256403f, - -0.119344f, 0.067782f, -0.398983f, -0.123975f, -0.200205f, -0.047038f, - 0.026569f, 0.031037f, 0.094302f, -0.101239f, 0.433307f, -0.303612f, - 0.088537f, -0.164436f, 0.202471f, -0.048592f, -0.251904f, 0.122577f, - -0.309874f, -0.263405f, -0.292503f, 0.216589f, 0.035378f, 0.136599f, - -0.145844f, -0.018211f, 0.174084f, -0.449941f, -0.001428f, 0.064134f, - 0.039652f, 0.111083f, -0.246076f, -0.204733f, 0.056559f, -0.000123f, - 0.104049f, 0.138512f, -0.128309f, 0.087855f, 0.232784f, 0.247138f, - 0.162766f, 0.154829f, 0.313605f, -0.164115f, -0.050844f, 0.156549f, - 0.185279f, -0.238962f, -0.308281f, -0.179592f, -0.193262f, 0.201670f, - -0.203399f, -0.096831f, -0.127867f, 0.310674f, -0.008181f, 0.004078f, - -0.211038f, -0.193480f, -0.185639f, -0.150202f, -0.204858f, -0.240758f, - 0.114268f, -0.032535f, -0.052403f, -0.234333f, -0.064072f, -0.208444f, - -0.352853f, -0.224001f, -0.156330f, 0.215436f, 0.171846f, 0.291849f, - 0.108832f, 0.046991f, -0.127801f, 0.032485f, 0.141493f, 0.123319f, - -0.057250f, 0.315346f, -0.061317f, -0.465086f, -0.130179f, -0.217841f, - -0.239089f, -0.073251f, -0.327718f, 0.054905f, -0.283169f, -0.028900f, - 0.071450f, 0.270072f, 0.248891f, 0.088052f, 0.253319f, 0.122808f, - 0.175490f, -0.147805f, 0.089169f, -0.045457f, -0.330788f, 0.099791f, - -0.137376f, -0.195977f, -0.350942f, -0.284930f, -0.559037f, 0.030504f, - 0.162554f, -0.199100f, -0.050453f, -0.131320f, -0.077863f, -0.066253f, - -0.379723f, -0.424047f, -0.081182f, -0.252261f, -0.102815f, 0.058240f, - -0.182036f, 0.176772f, -0.070823f, 0.216054f, -0.211533f, -0.232992f, - 0.279346f, 0.117984f, 0.236674f, 0.126625f, -0.046220f, 0.044919f, - 0.278492f, 0.083944f, 0.180512f, 0.217994f, 0.401170f, -0.064417f, - 0.011636f, -0.139597f, -0.050020f, -0.268438f, -0.032803f, 0.024908f, - -0.085713f, -0.012984f, -0.055192f, -0.338657f, 0.045826f, -0.312849f, - -0.023393f, -0.168800f, -0.030886f, -0.131816f, -0.253542f, -0.104812f, - -0.354389f, 0.169464f, 0.094151f, -0.217122f, -0.456397f, 0.211478f, - 0.219232f, -0.155519f, -0.353700f, -0.264759f, -0.034709f, 0.034409f, - -0.148639f, -0.132850f, -0.216791f, -0.118492f, 0.173721f, -0.144181f, - 0.335028f, 0.176439f, 0.105980f, 0.169390f, 0.155615f, -0.040618f, - -0.176029f, 0.155569f, -0.184833f, -0.171099f, -0.178663f, -0.032051f, - -0.434334f, 0.092238f, -0.263103f, 0.061804f, -0.172957f, 0.005962f, - -0.100176f, 0.125898f, 0.048092f, -0.088141f, 0.247196f, -0.221601f, - -0.114474f, -0.124410f, -0.156393f, -0.181782f, -0.083562f, 0.034937f, - 0.403401f, -0.046200f, 0.322259f, 0.219678f, 0.109850f, 0.051837f, - 0.196861f, -0.019118f, 0.248818f, -0.137567f, 0.127862f, 0.052293f, - 0.298726f, 0.275788f, 0.015344f, 0.058714f, 0.283691f, -0.053794f, - -0.123270f, -0.227761f, -0.141744f, -0.268515f, -0.007189f, -0.242117f, - -0.252396f, -0.069017f, 0.034803f, -0.003388f, -0.262577f, 0.062115f, - -0.298393f, 0.215415f, -0.153615f, 0.289902f, 0.085886f, -0.504290f, - 0.077178f, 0.150861f, -0.228848f, -0.261020f, 0.198204f, 0.162113f, - 0.346418f, -0.286950f, 0.354756f, -0.226419f, 0.024720f, 0.208037f, - 0.107286f, -0.110849f, 0.104415f, -0.207725f, 0.063932f, -0.037748f, - -0.167037f, -0.068282f, 0.320815f, -0.051884f, 0.099989f, -0.078388f, - 0.127071f, 0.046675f, -0.336571f, -0.273080f, 0.264694f, -0.007352f, - -0.093828f, 0.094773f, -0.144434f, 0.091795f, -0.031615f, 0.056914f, - 0.064673f, -0.136669f, 0.344734f, 0.225926f, 0.283451f, -0.068354f, - 0.030572f, 0.180784f, -0.378047f, -0.092962f, -0.083291f, 0.038970f, - 0.052094f, -0.017932f, 0.216302f, -0.184396f, 0.079888f, 0.210406f, - -0.020627f, 0.244744f, 0.336972f, -0.182914f, -0.220976f, -0.304225f, - -0.330974f, -0.370868f, -0.084935f, -0.136489f, -0.210082f, -0.188088f, - -0.408768f, 0.184693f, -}; - -static const float av1_tx_split_nn_bias_8x16_layer0[64] = { - -0.274107f, 0.445751f, 0.234359f, 0.291593f, 0.163298f, 0.183707f, - -0.548839f, -0.190779f, -0.163346f, -0.669028f, 0.399209f, -0.354974f, - 0.000000f, -0.254630f, 0.220149f, 0.371104f, 0.789759f, 0.270300f, - 0.195126f, -0.206958f, 0.917708f, -0.256232f, 1.131933f, 1.178944f, - 0.461270f, 0.246169f, -0.818614f, -0.111986f, 0.759355f, 0.154889f, - 0.470299f, -1.025250f, 0.678678f, 0.959346f, -0.164105f, 0.544079f, - -0.448733f, 0.649221f, -0.536672f, 0.962758f, -0.256427f, 0.808664f, - -0.118694f, 0.684873f, -0.015635f, -0.046469f, 0.075481f, 0.412647f, - 0.454456f, -0.107169f, 0.775235f, -0.261629f, -1.194849f, 0.010093f, - -0.231289f, 0.658286f, -0.769320f, 0.564545f, 0.482962f, -0.131378f, - -0.255844f, -0.078400f, 0.476752f, 0.643001f, -}; - -static const float av1_tx_split_nn_weights_8x16_layer1[64] = { - -0.145065f, -0.145101f, 0.174786f, 0.196692f, 0.102025f, -0.087735f, - 0.386353f, -0.660539f, -0.183940f, 0.490045f, -0.276404f, -0.145669f, - 0.209846f, -0.085574f, -0.156821f, -0.377450f, -0.950010f, 0.450709f, - -0.108545f, -0.261181f, 1.435606f, -0.176621f, -1.158548f, 2.035680f, - 0.218069f, -0.138629f, 0.305958f, -0.277194f, -0.602468f, 0.203873f, - 0.120720f, 0.216095f, -0.434502f, -0.579746f, -0.239450f, 0.755529f, - 0.545643f, 0.232091f, 0.330169f, 0.988136f, -0.070465f, -0.345584f, - -0.162455f, -0.617064f, 0.123881f, -0.201098f, 0.222756f, 0.112932f, - 0.048647f, -0.147890f, 0.394584f, -0.262148f, 0.280564f, -0.195432f, - -0.047515f, 1.133410f, 0.255415f, -0.299032f, -0.397807f, -0.153246f, - -0.256734f, 0.177370f, 0.213522f, -0.530158f, -}; - -static const float av1_tx_split_nn_bias_8x16_layer1[1] = { - 0.14910713f, -}; - -static const NN_CONFIG av1_tx_split_nnconfig_8x16 = { - 8, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 64, - }, // num_hidden_nodes - { - av1_tx_split_nn_weights_8x16_layer0, - av1_tx_split_nn_weights_8x16_layer1, - }, - { - av1_tx_split_nn_bias_8x16_layer0, - av1_tx_split_nn_bias_8x16_layer1, - }, -}; -/******************************************************************************/ - -// Tx split model for 16x16 block. -static const float av1_tx_split_nn_weights_16x16_layer0[12 * 24] = { - -0.177215f, -0.297166f, 0.299924f, 0.207878f, 0.216871f, 0.173264f, - 0.295464f, 0.048395f, 0.154731f, 0.305880f, 0.056787f, -0.166617f, - 0.115653f, -0.529477f, -0.073995f, -0.211746f, -0.018169f, 0.000788f, - -0.024940f, -0.007055f, 0.001392f, 0.021678f, -1.594600f, -0.099593f, - 0.332930f, 0.103574f, 0.158249f, 0.182601f, 0.332665f, 0.226207f, - -0.139566f, 0.185531f, 0.099074f, -0.185654f, -0.203121f, -0.285678f, - -0.313453f, -0.294452f, -0.143707f, -0.031265f, -0.453030f, -0.061874f, - -0.066150f, -0.099058f, -0.458879f, 0.127544f, 0.338314f, -0.161350f, - 0.030091f, -0.075528f, 0.004320f, 0.353690f, -0.013480f, -0.420402f, - -0.004659f, -0.329401f, -0.001745f, 0.227384f, -0.055183f, 0.121405f, - 0.160340f, 0.143603f, -0.221813f, 0.079107f, -0.657639f, -0.084348f, - -0.303414f, 0.046774f, -0.367679f, 0.060005f, 0.168645f, 0.084421f, - -0.133625f, 0.301375f, 0.079412f, -0.419303f, 0.017235f, 0.068637f, - 0.018384f, -0.428325f, -0.019753f, 0.149444f, -0.474836f, -0.287162f, - 0.198083f, 0.028292f, -0.299092f, -0.005849f, -0.256245f, 0.233277f, - -0.217561f, -0.264003f, 0.269411f, 0.207032f, -0.339411f, -0.198431f, - -0.028521f, 0.158076f, 0.177116f, 0.345702f, -0.145132f, 0.064623f, - -0.090867f, 0.288816f, -0.263198f, -0.071028f, -0.044546f, 0.380017f, - -0.014100f, -0.271192f, -0.318559f, 0.129015f, -0.050314f, -0.093355f, - -0.578498f, 0.099090f, -0.133080f, -0.029975f, -0.059828f, -0.157765f, - -0.321153f, -0.343671f, -0.242959f, 0.128304f, 0.017170f, 0.072787f, - -0.475838f, -0.003806f, -0.068615f, 0.150556f, -0.159903f, -0.416513f, - 0.218794f, -0.290456f, -0.084569f, -0.170014f, -0.044414f, -0.153069f, - -0.077329f, -0.089747f, -0.096526f, 0.537952f, 0.134725f, -0.006469f, - -0.323335f, -0.168183f, -0.107163f, -0.139954f, 0.011286f, -0.021712f, - -0.513992f, 0.259135f, -0.319808f, 0.077811f, 0.104613f, 0.370571f, - 0.185244f, 0.065530f, -0.091098f, -0.573741f, 0.111934f, 0.437417f, - -0.123691f, 0.220641f, -0.024783f, -0.149460f, -0.354185f, -0.134127f, - 0.038015f, -0.380596f, 0.250980f, 0.142208f, 0.135170f, -0.131129f, - -0.357556f, -0.530945f, 0.159672f, -0.147025f, -0.377829f, -0.504508f, - -0.492870f, 0.020753f, 0.142818f, 0.025172f, 0.086140f, 0.091283f, - 0.087491f, -0.186415f, 0.177785f, -0.195121f, -1.191148f, -0.477102f, - 0.023371f, 0.227004f, -0.023502f, -0.242913f, -0.074398f, -0.153480f, - 0.162900f, 0.415509f, -0.162565f, -0.131709f, -0.258852f, -0.252027f, - -0.080845f, -0.330274f, 0.021874f, 0.232398f, 0.069277f, 0.220567f, - -0.024237f, -0.366771f, 0.081673f, -0.429906f, -0.302170f, 0.061045f, - 0.352777f, -0.230376f, 0.408153f, 0.064758f, 0.142051f, 0.007219f, - 0.622878f, 0.212577f, 0.036489f, 0.081150f, -0.284767f, 0.107763f, - -0.529786f, -0.072190f, -0.300421f, -0.287959f, -0.568900f, 0.011547f, - -0.131696f, -0.356854f, -0.587962f, -0.026598f, 0.405829f, 0.057565f, - 0.414265f, -0.159155f, 0.221456f, 0.146314f, 0.265776f, -0.006516f, - 0.473978f, -0.186431f, 0.288672f, -0.060437f, 0.083380f, -0.205641f, - 0.360016f, 0.222041f, 0.420011f, 0.024579f, 0.377546f, 0.250380f, - -0.069900f, 0.296743f, 0.073532f, -0.243225f, -0.374987f, -0.387288f, - -0.237255f, -0.287013f, 0.417831f, -0.252988f, -0.257652f, -0.066775f, - -0.253926f, 0.057841f, 0.346133f, -0.157797f, -0.406028f, -0.286893f, - 0.274507f, -0.452561f, 0.143381f, -0.097755f, 0.021242f, 0.034561f, - 0.044115f, 0.004065f, 0.066729f, 0.043558f, 0.102991f, -0.477574f, -}; - -static const float av1_tx_split_nn_bias_16x16_layer0[24] = { - -0.479033f, 1.467402f, -0.366291f, 0.372511f, 0.715322f, -0.605500f, - 0.176848f, 0.032318f, 0.237429f, -0.046047f, 0.452082f, 0.451805f, - -0.822845f, 0.636762f, -0.057350f, 1.163978f, 0.728287f, 0.603654f, - -0.245519f, -0.893569f, -1.428185f, 0.808870f, -0.076159f, 1.231976f, -}; - -static const float av1_tx_split_nn_weights_16x16_layer1[24] = { - -0.176161f, 1.670188f, -0.180755f, -0.321326f, 0.249728f, -0.170504f, - -0.538432f, 0.033893f, 0.149842f, 0.404140f, -0.377812f, 0.338838f, - -0.176091f, 0.249844f, -0.362533f, 1.412460f, 0.196862f, 0.278194f, - -0.140444f, 0.297746f, 0.172533f, 0.116470f, -0.151656f, -0.603250f, -}; - -static const float av1_tx_split_nn_bias_16x16_layer1[1] = { - 0.184803f, -}; - -static const NN_CONFIG av1_tx_split_nnconfig_16x16 = { - 12, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 24, - }, // num_hidden_nodes - { - av1_tx_split_nn_weights_16x16_layer0, - av1_tx_split_nn_weights_16x16_layer1, - }, - { - av1_tx_split_nn_bias_16x16_layer0, - av1_tx_split_nn_bias_16x16_layer1, - }, -}; -/******************************************************************************/ - -// Tx split model for 32x32 block. -static const float av1_tx_split_nn_weights_32x32_layer0[12 * 32] = { - -0.439303f, 0.004813f, -0.365052f, -0.116868f, -0.356716f, -0.196537f, - -0.196770f, -0.076096f, 0.357004f, -0.044909f, -0.112910f, -0.129081f, - 0.156725f, -0.386346f, 0.038971f, 0.160696f, 0.204923f, -0.384333f, - -0.319546f, 0.028179f, -0.250524f, -0.289669f, -0.284138f, -0.258963f, - -0.180854f, -0.000807f, -0.029620f, -0.353134f, 0.212408f, 0.141414f, - 0.303016f, 0.098066f, 0.482455f, 0.036069f, -0.166279f, 0.210119f, - -0.086337f, -0.023550f, -0.250796f, -0.183945f, -0.393856f, 0.170608f, - -0.306403f, 0.026318f, -0.277296f, 0.092684f, -0.033584f, -0.018371f, - -0.025043f, -0.257659f, -0.139163f, -0.206949f, -0.190105f, 0.028053f, - 0.361851f, -0.364726f, -0.096771f, -0.184166f, -0.433228f, -0.182191f, - -0.097051f, 0.259172f, 0.016432f, 0.259358f, 0.145059f, 0.037196f, - 0.091581f, -0.219644f, 0.140384f, -0.446837f, -0.234531f, 0.149508f, - -0.083429f, 0.186189f, -0.099890f, -0.111277f, 0.495214f, 0.085053f, - -0.266613f, -0.051366f, 0.148593f, 0.111875f, 0.077787f, -0.371653f, - -0.146157f, -0.229235f, 0.076203f, 0.488975f, 0.096771f, -0.009483f, - 0.192985f, 0.246273f, -0.192671f, -0.557890f, -0.292650f, -0.088907f, - -0.106892f, -0.329659f, 0.012105f, -0.359326f, 0.170723f, -0.004357f, - 0.171593f, -0.478768f, -0.236016f, -0.035077f, 0.133731f, 0.137962f, - -0.397926f, -0.155164f, -0.276709f, -0.186602f, -0.258301f, 0.036965f, - -0.649359f, 0.127605f, 0.097930f, 0.182775f, -0.313324f, 0.053349f, - 0.204203f, -0.222948f, -0.059008f, -0.049759f, -0.056848f, 0.087497f, - -0.039987f, -0.055042f, -0.041623f, -0.078424f, -0.317291f, -0.191398f, - 0.632147f, 0.221825f, 0.268394f, -0.096357f, 0.442545f, -0.007117f, - -0.036125f, 0.000525f, 0.088092f, -0.203653f, 0.086925f, 0.439141f, - 0.329889f, -0.370050f, -0.194306f, -0.207430f, 0.132779f, -0.217614f, - -0.039444f, -0.053019f, -0.260725f, -0.116563f, -0.271048f, 0.283737f, - -0.007300f, 0.062257f, -0.347865f, -0.296767f, -0.359123f, 0.230459f, - -0.189117f, -0.087622f, -0.561091f, 0.184182f, -0.044980f, 0.012643f, - 0.241672f, 0.050272f, -0.204851f, -0.159285f, -0.064081f, -0.118666f, - -0.269471f, 0.231668f, 0.135749f, -0.131162f, 0.062760f, 0.100949f, - 0.074967f, -0.056918f, 0.251707f, 0.034098f, 0.341290f, -0.105027f, - 0.313246f, -0.092679f, -0.014632f, -0.390967f, 0.136881f, -0.241554f, - 0.097674f, 0.110832f, -0.390245f, 0.017654f, -0.506222f, 0.065252f, - 0.244834f, -0.171352f, -0.331702f, 0.111043f, 0.125217f, -0.058116f, - -0.382595f, -0.052545f, 0.114261f, -0.493617f, 0.243984f, -0.171053f, - 0.165009f, -0.063020f, 0.096502f, 0.341339f, -0.013443f, 0.056372f, - 0.339284f, 0.398376f, 0.389409f, 0.257252f, 0.517368f, 0.078856f, - 0.087716f, -0.171092f, 0.227461f, 0.125307f, -0.054423f, -0.143161f, - 0.224041f, -0.086477f, -0.092548f, 0.072392f, -0.061608f, 0.258347f, - 0.147033f, -0.478244f, -0.204869f, 0.038552f, -0.144563f, 0.224087f, - -0.296705f, 0.153889f, -0.064624f, 0.085265f, -0.103826f, 0.127971f, - 0.019965f, 0.111937f, -0.074187f, -0.029518f, -0.127305f, -0.012210f, - 0.042714f, 0.070052f, -0.202360f, 0.348144f, -0.132097f, -0.209585f, - -0.248286f, -0.065774f, -0.089482f, -0.133226f, 0.325430f, -0.013468f, - -0.406090f, -0.144936f, 0.208620f, 0.343445f, -0.059639f, 0.114857f, - -0.069431f, -0.218725f, 0.190575f, -0.368101f, 0.030030f, 0.062815f, - -0.239369f, -0.537852f, 0.022487f, 0.023038f, 0.190788f, 0.040123f, - -0.004304f, 0.060749f, -0.108929f, 0.136796f, -0.542875f, -0.227074f, - -0.182244f, 0.082559f, 0.019149f, 0.178854f, 0.120284f, 0.009070f, - 0.068268f, -0.544822f, 0.120536f, 0.354028f, -0.119890f, -0.122055f, - -0.405335f, 0.122341f, -0.304412f, 0.062405f, -0.302568f, -0.276505f, - -0.120915f, -0.221841f, 0.282007f, -0.253971f, 0.059517f, -0.144976f, - 0.149391f, -0.047355f, -0.167742f, -0.392333f, -0.041132f, 0.342135f, - 0.017485f, 0.021038f, -0.023728f, -0.192181f, -0.103996f, 0.092873f, - -0.114365f, -0.397732f, -0.065421f, 0.053084f, 0.035201f, 0.053019f, - -0.105377f, -0.039500f, 0.131904f, -0.123911f, -0.390328f, -0.125198f, - -0.000126f, 0.014864f, -0.220187f, 0.084056f, -0.492155f, -0.164979f, - 0.133592f, 0.121519f, -0.240813f, 0.186680f, 0.118673f, 0.235006f, - -0.239894f, -0.185759f, -0.336992f, 0.209620f, -0.298845f, 0.127803f, - -0.083992f, 0.194340f, -0.245378f, 0.212308f, 0.142512f, -0.163324f, - 0.383495f, 0.291065f, 0.286620f, -0.239957f, 0.225127f, -0.174424f, - 0.297231f, -0.045434f, 0.156444f, -0.184273f, -0.204567f, 0.202551f, - 0.370019f, -0.073910f, 0.344897f, 0.063100f, 0.338547f, -0.099145f, - 0.391863f, -0.214244f, -0.241734f, -0.281851f, -0.035133f, -0.153157f, -}; - -static const float av1_tx_split_nn_bias_32x32_layer0[32] = { - 0.143343f, -0.021982f, -0.314939f, 0.170867f, -0.081248f, 0.125758f, - -0.355762f, 0.279798f, 1.027712f, -0.434660f, 1.072005f, 0.668893f, - -0.031216f, -0.528650f, 0.328349f, 0.543645f, -0.188810f, 0.221110f, - -1.638637f, 0.058045f, -1.731105f, -0.444284f, 0.513693f, 0.890025f, - 0.160288f, 0.393312f, 0.332856f, -0.080767f, 0.299822f, 0.235876f, - 0.254942f, -0.017796f, -}; - -static const float av1_tx_split_nn_weights_32x32_layer1[32] = { - -0.090326f, -0.267553f, -0.026071f, 0.100912f, 0.279137f, 0.079064f, - -0.074885f, 0.053804f, 0.736810f, -0.031693f, -0.970514f, 0.174069f, - 0.095940f, -0.065047f, 0.052911f, 0.176728f, -0.058274f, 0.148364f, - -0.162210f, 0.093875f, -0.367663f, 0.020876f, 0.137280f, -1.099116f, - 0.146854f, 0.075590f, 0.228534f, 0.141993f, 0.072143f, 0.101421f, - -0.068547f, -0.154148f, -}; - -static const float av1_tx_split_nn_bias_32x32_layer1[1] = { - 0.316622f, -}; - -static const NN_CONFIG av1_tx_split_nnconfig_32x32 = { - 12, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 32, - }, // num_hidden_nodes - { - av1_tx_split_nn_weights_32x32_layer0, - av1_tx_split_nn_weights_32x32_layer1, - }, - { - av1_tx_split_nn_bias_32x32_layer0, - av1_tx_split_nn_bias_32x32_layer1, - }, -}; -/******************************************************************************/ - -// Tx split model for 64x64 block. -static const float av1_tx_split_nn_weights_64x64_layer0[12 * 32] = { - -0.006828f, 0.149944f, -0.017614f, -0.044599f, -0.024517f, 0.507698f, - 0.001039f, 0.037164f, 0.015091f, -0.306620f, -0.162047f, -0.369440f, - 0.396310f, 0.087121f, 0.208609f, -0.083068f, 0.493774f, 0.217682f, - 0.377393f, 0.172879f, 0.397422f, 0.078919f, 0.741350f, 0.064169f, - -0.099989f, -0.192983f, -0.278230f, -0.310048f, -0.439965f, -0.226698f, - -0.436596f, -0.007551f, -0.396721f, 0.153570f, -0.190838f, -0.071869f, - 0.048799f, -0.301301f, -0.005015f, 0.500480f, -0.030622f, -0.559095f, - -0.032634f, -0.054160f, -0.056979f, -0.456545f, 0.306536f, -0.411323f, - -0.005366f, -0.069496f, 0.019990f, 0.327931f, -0.002516f, 0.393190f, - 0.001759f, 0.035093f, -0.030302f, -0.528984f, 0.174781f, 0.241462f, - -0.415427f, -0.164502f, 0.143065f, -0.122595f, 0.082049f, -0.143346f, - 0.055642f, -0.124701f, 0.004050f, -0.216235f, -2.681730f, 0.101658f, - 0.381239f, 0.465936f, 0.331154f, 0.301708f, -0.360171f, 0.054886f, - -0.118658f, 0.287921f, 0.277859f, 0.203784f, 0.247809f, 0.656924f, - -0.354628f, 0.315081f, 0.105108f, -0.510179f, 0.059267f, 0.061386f, - 0.076423f, 0.347119f, 0.100134f, 0.028402f, -0.118621f, -0.238689f, - 0.080141f, -0.138863f, 0.009009f, -0.100526f, -0.138875f, 0.066992f, - 0.005949f, 0.564336f, 0.046994f, 0.004655f, 0.366047f, 0.014695f, - -0.146928f, -0.024665f, -0.440357f, -0.109395f, 0.527231f, -0.020925f, - -0.227236f, -0.068141f, 0.282009f, 0.040192f, -0.267100f, 0.229228f, - 0.133861f, 0.338706f, -0.030178f, -0.040919f, -0.026343f, -0.330338f, - -0.066931f, -0.110580f, -0.072056f, 0.599457f, -0.020738f, 0.169200f, - 0.836240f, -0.157548f, 0.386273f, 0.002404f, 0.329410f, -0.007020f, - 0.351705f, -0.041259f, 0.388861f, 0.003899f, 0.582627f, 0.023572f, - 0.409912f, -0.158472f, 0.536383f, 0.525093f, 0.604247f, 0.439159f, - 0.692832f, 0.046272f, 0.590367f, -0.082166f, 0.262357f, 0.478671f, - 0.031935f, 0.042675f, 0.120002f, 0.398616f, -0.078967f, 0.227986f, - -0.044679f, 0.151061f, -0.085564f, 0.220205f, -0.265606f, -0.203623f, - 0.204719f, -0.125922f, 0.038544f, -0.269379f, 0.025866f, 0.109967f, - 0.019064f, -0.237297f, -0.309746f, -0.329118f, -0.278368f, -0.063859f, - 0.278496f, 0.018620f, 0.209971f, 0.296250f, 0.142850f, 0.288689f, - 0.137084f, 0.130517f, 0.128171f, -0.155396f, -0.008449f, -0.099845f, - 0.173455f, -0.059909f, -0.147318f, 0.102851f, -0.251389f, -0.001448f, - 0.103907f, 0.297273f, -0.027846f, 0.028260f, -0.382601f, 0.346695f, - -0.601641f, 0.162366f, -0.477495f, -0.042731f, -0.387871f, -0.051791f, - -0.401498f, -0.048446f, -0.456270f, -0.062287f, 0.493919f, 0.003008f, - 0.099917f, -0.358525f, -0.094903f, -0.022811f, -0.062259f, 0.019455f, - -0.050644f, 0.020041f, -0.132912f, -0.061578f, -3.083691f, -0.014961f, - -0.129115f, -0.710559f, 0.157213f, -0.844037f, -0.121991f, -0.943386f, - -0.231269f, -0.003462f, 0.331478f, -0.132703f, -1.285993f, -0.120957f, - -0.373755f, -0.322609f, 0.309059f, -0.131523f, -0.118334f, -0.063805f, - -0.104251f, 0.012166f, -0.094699f, -0.283753f, 0.128168f, -0.526929f, - -0.050331f, 0.186153f, 0.005913f, -0.221236f, 0.036363f, 0.160909f, - -0.001342f, -0.382749f, 0.037820f, 0.281689f, -0.024275f, 0.028854f, - 0.318291f, 0.318526f, 0.035778f, 0.034031f, 0.189663f, -0.293367f, - 0.082022f, 0.127923f, 0.078866f, -0.081361f, -0.268117f, 0.246675f, - 0.248605f, -0.215479f, -0.073084f, 0.496140f, -0.067327f, 0.396237f, - -0.120739f, 0.033752f, -0.044120f, -0.218941f, -0.028078f, 0.195132f, - -0.040400f, 0.281604f, -0.100471f, 0.415207f, -0.258503f, -0.429749f, - 0.150569f, -0.010859f, 0.136448f, 0.026589f, 0.148466f, 0.110764f, - 0.380967f, 0.009177f, 0.103075f, 0.116417f, 0.226273f, -0.327746f, - 0.169346f, 0.284553f, -0.094986f, 0.312745f, -0.147840f, 0.025062f, - -0.494482f, 0.112388f, -0.213962f, 0.107050f, -0.433371f, -0.096276f, - -0.244835f, -0.003518f, -0.459148f, -0.145080f, 0.017150f, 0.042846f, - -0.237479f, 0.104746f, 0.158677f, 0.358937f, 0.099921f, 0.277109f, - 0.012410f, -0.062897f, 0.116130f, 0.255309f, 0.341628f, 0.145002f, - -0.429344f, -0.016433f, -0.068985f, 0.285194f, -0.286719f, -0.018298f, - -0.179369f, -0.194655f, -0.165380f, 0.026071f, -0.428268f, -0.379929f, - -0.727543f, 0.179610f, -0.963979f, -0.042026f, -0.616202f, 0.133401f, - -0.784966f, 0.061205f, -0.713357f, 0.129795f, 0.120512f, -0.339545f, - 0.353557f, 0.114906f, -0.329813f, -0.209987f, 0.085410f, 0.214313f, - -0.122082f, 0.335770f, -0.020937f, 0.202456f, 0.289023f, -0.421186f, - 0.337905f, 0.407663f, 0.132771f, 0.071734f, 0.213914f, 0.128595f, - 0.302659f, -0.209501f, 0.217756f, 0.253079f, -0.089505f, -0.205614f, -}; - -static const float av1_tx_split_nn_bias_64x64_layer0[32] = { - 0.296914f, -1.826816f, 0.346130f, 0.969520f, -0.528154f, 1.175862f, - -0.075985f, -0.097323f, -0.233059f, 0.004846f, 0.401279f, -2.272435f, - 0.086257f, 0.414162f, -0.194786f, -0.233887f, -0.113215f, -2.453546f, - 0.861214f, 0.298361f, 0.267397f, -0.158557f, -0.119911f, -0.098134f, - -0.339263f, 0.385871f, -0.678123f, 0.263218f, 0.251611f, -1.155773f, - -0.365437f, 0.229255f, -}; - -static const float av1_tx_split_nn_weights_64x64_layer1[32] = { - 0.502104f, -0.708023f, 0.419648f, 1.583418f, 0.419355f, -1.462981f, - -0.439623f, 0.405691f, 0.823257f, 0.061654f, 0.750875f, 0.775031f, - -0.387909f, 0.447385f, 0.284690f, 0.353262f, -0.224347f, 0.832864f, - -1.708491f, -1.042447f, -0.272829f, 0.540640f, 0.310509f, 0.723745f, - 0.245592f, -0.218417f, -0.597987f, -0.362301f, 0.702217f, -0.692614f, - 0.207812f, 0.513560f, -}; - -static const float av1_tx_split_nn_bias_64x64_layer1[1] = { -0.2307045f }; - -static const NN_CONFIG av1_tx_split_nnconfig_64x64 = { - 12, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 32, - }, // num_hidden_nodes - { - av1_tx_split_nn_weights_64x64_layer0, - av1_tx_split_nn_weights_64x64_layer1, - }, - { - av1_tx_split_nn_bias_64x64_layer0, - av1_tx_split_nn_bias_64x64_layer1, - }, -}; -/******************************************************************************/ - -// Tx split model for 4x16 block. -static const float av1_tx_split_nn_weights_4x16_layer0[8 * 16] = { - -1.344184f, -1.454625f, -0.703110f, -0.140570f, -0.841536f, -0.068131f, - -2.128968f, -0.655518f, 0.432180f, 0.879752f, -0.222211f, 0.061615f, - -0.230969f, 0.569496f, 1.424188f, 0.598063f, -0.436005f, -0.737606f, - -0.137875f, -0.085730f, -0.076512f, -0.583101f, -0.937377f, -0.203556f, - -0.215797f, -0.015361f, -0.124098f, -0.411917f, 0.340441f, -0.331752f, - -0.472607f, -0.097714f, -0.930572f, -1.354713f, -0.550724f, 0.176212f, - -0.636060f, 0.183271f, -0.610212f, 0.345895f, -1.100906f, -1.605713f, - 0.111888f, -0.140937f, 0.063013f, -0.013315f, -0.273472f, -0.255870f, - 1.200328f, 0.274002f, 1.005776f, 0.322392f, 1.222373f, 0.158227f, - 0.408810f, 0.145022f, 0.139842f, -1.249412f, 0.286672f, -0.635699f, - 0.312562f, -0.495606f, -1.117034f, -0.085107f, -0.097484f, -0.341521f, - -0.132199f, -0.863055f, 0.217579f, -1.161425f, -0.302087f, -1.357271f, - -0.520724f, -1.211069f, -1.048729f, -0.333087f, -1.171527f, -0.280824f, - -2.057684f, -0.228755f, 0.606278f, 0.101198f, -0.314847f, -1.303255f, - -0.294964f, 1.301923f, 0.041712f, 0.077593f, -1.152746f, 0.495315f, - -0.751566f, 0.230249f, -0.840661f, 0.100731f, 1.346269f, 0.649898f, - -1.432258f, -0.456710f, -1.018123f, -0.348559f, -1.225226f, -0.170717f, - -0.354072f, 0.068292f, -0.234168f, 0.277503f, 0.179134f, 0.907420f, - 0.354626f, -0.627210f, 0.905779f, 0.512612f, 0.161190f, -0.843177f, - 0.014953f, -0.354983f, 0.011116f, -0.429598f, -1.017138f, -0.211432f, - 0.941840f, -0.281747f, 0.957776f, -0.541914f, 1.041880f, -0.433580f, - -1.416451f, -0.166467f, -}; - -static const float av1_tx_split_nn_bias_4x16_layer0[16] = { - 3.086118f, -3.235095f, 4.830956f, -0.165706f, 0.955031f, 4.055783f, - -0.311489f, 4.660205f, -0.576277f, -0.248111f, -0.790519f, -1.686412f, - -1.191704f, -3.800073f, 4.121552f, -1.399397f, -}; - -static const float av1_tx_split_nn_weights_4x16_layer1[16] = { - -0.758677f, 0.388776f, 0.439906f, 0.011390f, -0.084319f, -0.667969f, - -0.467316f, -0.875491f, -0.160668f, 0.805292f, 0.114393f, -0.549682f, - 0.462109f, 0.343315f, 1.092593f, 0.483152f, -}; - -static const float av1_tx_split_nn_bias_4x16_layer1[1] = { - 0.8205083f, -}; - -static const NN_CONFIG av1_tx_split_nnconfig_4x16 = { - 8, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 16, - }, // num_hidden_nodes - { - av1_tx_split_nn_weights_4x16_layer0, - av1_tx_split_nn_weights_4x16_layer1, - }, - { - av1_tx_split_nn_bias_4x16_layer0, - av1_tx_split_nn_bias_4x16_layer1, - }, -}; -/******************************************************************************/ - -// Tx split model for 16x32 block. -static const float av1_tx_split_nn_weights_16x32_layer0[8 * 32] = { - 0.180713f, 0.033211f, 0.607561f, 0.138642f, 0.637204f, -0.000940f, - 0.012630f, 0.358109f, 0.022238f, 0.190418f, 0.079088f, 0.065925f, - 0.038242f, 0.162380f, -0.122728f, 0.379382f, -0.303283f, -0.327550f, - 0.029120f, -0.284553f, 0.269588f, -0.309805f, -0.241036f, -0.161103f, - -0.304887f, 0.239843f, -0.149146f, 0.311234f, -0.073640f, -0.132718f, - 0.178901f, 0.474712f, 0.020280f, 0.063685f, -0.609170f, -0.013658f, - -0.338074f, 0.250429f, 0.082978f, -0.186315f, -0.788959f, 0.039859f, - -0.426461f, -0.001524f, -0.447211f, 0.378102f, 0.315617f, 0.017428f, - 0.745494f, -0.219024f, 0.512836f, 0.200522f, 0.680449f, 0.313686f, - -0.412569f, -0.132927f, 0.631120f, 0.042735f, 0.336153f, 0.044772f, - 0.432606f, 0.175681f, -0.634411f, -0.073509f, -0.040643f, -0.559260f, - -0.104034f, -0.570495f, -0.247365f, 0.063256f, -0.582021f, -0.492585f, - -0.194955f, -0.207934f, -0.506627f, 0.021743f, -0.416518f, 0.320876f, - 0.115889f, 0.149399f, -0.229376f, 0.095505f, 0.115191f, -0.471921f, - 0.113068f, 0.343684f, -0.036831f, 0.021240f, 0.295112f, 0.031166f, - 0.448201f, -0.132241f, 0.164032f, 0.355572f, 0.072154f, 0.017335f, - -0.046113f, 0.178719f, -0.026881f, -0.242590f, 0.055073f, -0.012958f, - 0.077904f, 0.351356f, 0.107655f, 0.260568f, -0.080052f, -0.197553f, - 0.085763f, 0.263416f, -0.327741f, 0.158855f, 0.056899f, -0.162121f, - 0.339518f, -0.571204f, 0.264966f, -0.252214f, -0.202560f, -0.134213f, - -0.330188f, 0.009470f, -0.468376f, -0.065240f, -0.307957f, 0.116479f, - -0.222238f, -0.458716f, 0.186493f, -0.391415f, 0.118649f, -0.104653f, - -0.259958f, -0.332081f, -0.403785f, -0.050147f, -0.573511f, 0.177117f, - -0.598358f, 0.164947f, -0.119694f, -0.058520f, 0.203829f, -0.267404f, - -0.048202f, -0.600006f, 0.181594f, -0.731805f, 0.146417f, -0.687148f, - -1.210525f, -0.450101f, -0.620635f, 0.208825f, -0.611357f, 0.112202f, - -0.309468f, -0.323545f, 0.357770f, 0.308061f, 0.553199f, 0.049012f, - 0.530093f, -0.208597f, 0.607882f, -0.058120f, -0.527634f, 0.018136f, - 0.060753f, 0.118894f, 0.175649f, 0.014731f, 0.428318f, -0.106465f, - -0.119077f, 0.080179f, 0.524997f, 0.368286f, 0.528286f, 0.213659f, - 0.639286f, 0.195079f, -0.049815f, -0.092008f, -0.302958f, 0.298149f, - -0.173870f, -0.145205f, -0.233589f, -0.303368f, 0.141275f, 0.325622f, - -0.115293f, 0.155188f, 0.047225f, 0.231050f, -0.167447f, 0.349754f, - 0.295544f, -0.319466f, 0.095144f, 0.174612f, -0.194652f, 0.305915f, - -0.239008f, -0.037453f, 0.280696f, 0.125850f, 0.749196f, -0.101919f, - 0.791808f, -0.236811f, 0.064157f, 0.032865f, -0.225911f, 0.350384f, - 0.723183f, -0.103992f, 0.483085f, -0.123992f, 0.602138f, 0.023895f, - -0.692601f, -0.118387f, 0.162527f, 0.145178f, -0.184702f, -0.017753f, - -0.159436f, 0.124105f, -0.131067f, 0.310275f, 0.151499f, 0.138924f, - 0.537459f, 0.263212f, 0.615896f, 0.281255f, 0.021293f, -0.473459f, - 0.210145f, -0.056682f, 0.063658f, 0.377254f, -0.314410f, -0.183487f, - 0.300384f, 0.328471f, 0.164694f, -0.159272f, -0.160942f, -0.502861f, - -0.129147f, 0.045916f, -0.606865f, -0.101378f, -}; - -static const float av1_tx_split_nn_bias_16x32_layer0[32] = { - 0.051664f, -0.212487f, -0.077596f, -0.818467f, 0.638475f, -0.759937f, - 0.157198f, 0.989640f, 1.586035f, 0.431144f, 0.041605f, 0.543085f, - 0.498379f, 0.320504f, 0.134233f, 0.670979f, -0.105562f, -1.574879f, - 1.261812f, -0.287530f, -1.610592f, 0.730899f, -0.894240f, -0.657790f, - 0.270806f, -0.181708f, 0.298578f, 0.817240f, -0.221508f, -0.201771f, - -0.294389f, 1.456413f, -}; - -static const float av1_tx_split_nn_weights_16x32_layer1[32] = { - 1.208914f, 0.324728f, 0.383352f, -0.874321f, 0.172565f, -0.580927f, - -0.432927f, 0.433698f, -0.801935f, 0.672028f, 0.563493f, 0.260077f, - -0.200557f, -0.121638f, 0.530735f, -0.525196f, 0.281799f, 0.624204f, - -0.662775f, -0.230887f, 0.980989f, 0.223437f, -0.790591f, 0.600724f, - -0.273445f, 0.427635f, -0.501641f, -0.878390f, 0.234731f, -0.172550f, - 0.418904f, 1.792187f, -}; - -static const float av1_tx_split_nn_bias_16x32_layer1[1] = { - -0.29233751f, -}; - -static const NN_CONFIG av1_tx_split_nnconfig_16x32 = { - 8, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 32, - }, // num_hidden_nodes - { - av1_tx_split_nn_weights_16x32_layer0, - av1_tx_split_nn_weights_16x32_layer1, - }, - { - av1_tx_split_nn_bias_16x32_layer0, - av1_tx_split_nn_bias_16x32_layer1, - }, -}; -/******************************************************************************/ - -// Tx split model for 32x64 block. -static const float av1_tx_split_nn_weights_32x64_layer0[8 * 32] = { - 0.031614f, -0.110926f, 0.052418f, -0.702506f, 0.045708f, 0.238329f, - -0.021806f, -0.208128f, 0.509745f, -0.293891f, 0.277788f, 0.113937f, - 0.741576f, 0.062848f, 0.351878f, 0.212532f, 0.385842f, 0.081517f, - 0.398502f, -0.015156f, 0.242616f, 0.214619f, -0.182678f, -0.170546f, - 0.110605f, -0.236749f, -0.023831f, -0.285243f, 0.147156f, -0.257639f, - 0.341355f, -0.571641f, -0.721797f, 0.139588f, -0.518494f, -0.206526f, - -0.570560f, -0.184295f, 0.110271f, 0.210292f, -0.109132f, -0.001080f, - 0.129251f, -0.204230f, -0.396312f, -0.183024f, 0.421243f, -0.013154f, - 0.222627f, 0.169826f, 0.226037f, 0.218153f, -0.343528f, 0.274906f, - -0.156632f, 0.250261f, -0.484020f, 0.019909f, -0.349575f, -0.286643f, - -0.507396f, 0.202446f, -0.154110f, -0.292644f, 0.122666f, 0.306963f, - 0.424895f, 0.005579f, 0.494094f, -0.079551f, 0.473740f, 0.352414f, - -0.356917f, 0.264331f, -0.554487f, 0.119978f, 0.012291f, -0.141641f, - -0.254714f, -0.213723f, -0.116701f, -0.011267f, 0.190025f, -0.118501f, - 0.305151f, -0.316782f, -0.220801f, -0.308420f, -0.324285f, 0.421329f, - -0.177066f, -0.055114f, 0.229698f, -0.199523f, 0.054278f, 0.365020f, - -0.060586f, -0.300618f, 0.157563f, -0.064338f, -0.005711f, -0.176991f, - -0.424502f, -0.111914f, 0.092608f, 0.126621f, 0.078547f, 0.148008f, - 0.024221f, 0.124599f, 0.001343f, 0.059402f, 0.453753f, 0.047102f, - 0.242544f, 0.055735f, -0.067451f, -0.170061f, -0.170469f, -0.232173f, - 0.214908f, 0.248889f, 0.544348f, -0.084566f, 0.402478f, 0.298031f, - 0.099038f, -0.238019f, -0.475085f, -0.070042f, -0.754955f, -0.049095f, - -0.783801f, -0.099857f, -0.582008f, -0.055194f, -0.103655f, 0.143689f, - 0.100219f, 0.293934f, 0.099271f, -0.036320f, 0.356626f, -0.261445f, - 0.879544f, 0.000878f, 0.532920f, -0.093918f, 0.508867f, -0.040215f, - -0.789042f, -0.145380f, -0.090040f, -0.066636f, 0.015212f, 0.352989f, - -0.058831f, -0.164588f, 0.039890f, 0.122861f, 0.222508f, 0.061217f, - 0.466487f, 0.022666f, 0.423777f, -0.002200f, -0.656835f, -0.099760f, - -0.520606f, 0.303204f, -0.563620f, -0.160922f, -0.243203f, 0.313354f, - -0.336516f, -0.206764f, -0.236040f, 0.325899f, -0.418748f, 0.163205f, - -0.476242f, -0.121928f, 0.139178f, -0.157193f, -0.531766f, -0.180202f, - -0.485254f, 0.187703f, -0.440072f, 0.137854f, 0.029139f, 0.109530f, - -0.078475f, -0.360618f, -0.334672f, -0.350890f, -0.403976f, 0.180336f, - -0.304542f, 0.005123f, 0.413995f, 0.314639f, 0.342648f, -0.293264f, - 0.358135f, -0.180425f, -0.369530f, -0.048413f, 0.498366f, 0.121875f, - 0.270948f, -0.187966f, 0.342503f, 0.174420f, -0.352105f, 0.088080f, - 0.008277f, 0.020275f, -0.002381f, 0.504389f, -0.018832f, -0.366047f, - -0.090947f, -0.168150f, 0.016184f, -0.328914f, 0.089579f, -0.017349f, - 0.005844f, -0.005010f, -1.857514f, -0.282426f, 0.010177f, -0.214727f, - -0.182529f, 0.156943f, -0.162032f, -0.472654f, 0.069432f, 0.016901f, - -0.767905f, 0.137129f, -0.411463f, 0.049056f, -0.431657f, -0.037641f, - 0.785500f, 0.046225f, 0.195831f, 0.245204f, 0.368614f, 0.212261f, - 0.440626f, -0.158048f, -0.461031f, -0.146280f, -}; - -static const float av1_tx_split_nn_bias_32x64_layer0[32] = { - 0.490777f, -1.894238f, 0.621333f, -0.076756f, 0.286298f, 0.286375f, - -0.126431f, -0.350034f, -1.017572f, 0.620125f, 0.408128f, 0.238756f, - -0.060728f, 0.210912f, 0.043124f, 0.445649f, 0.907025f, 0.360272f, - 1.083101f, -0.068952f, 1.062348f, 0.396354f, 0.280075f, 0.501732f, - 0.328422f, 0.066241f, 0.474697f, 0.126313f, 0.741206f, 0.314796f, - 0.552712f, 0.299410f, -}; - -static const float av1_tx_split_nn_weights_32x64_layer1[32] = { - 1.033823f, 0.603439f, 0.304591f, -0.279940f, -0.780909f, -0.132801f, - 0.154059f, 0.662014f, -0.718368f, 0.198733f, 0.039766f, -0.208516f, - -0.104909f, -0.394209f, 0.081617f, 0.365041f, -0.874960f, -0.063315f, - -1.189897f, 0.337225f, 0.410893f, 0.307519f, 0.221323f, 0.233895f, - 0.469536f, 0.438557f, 0.280144f, 0.422423f, -1.394513f, 0.781900f, - 0.352981f, 0.111265f, -}; - -static const float av1_tx_split_nn_bias_32x64_layer1[1] = { - -0.18160765f, -}; - -static const NN_CONFIG av1_tx_split_nnconfig_32x64 = { - 8, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 32, - }, // num_hidden_nodes - { - av1_tx_split_nn_weights_32x64_layer0, - av1_tx_split_nn_weights_32x64_layer1, - }, - { - av1_tx_split_nn_bias_32x64_layer0, - av1_tx_split_nn_bias_32x64_layer1, - }, -}; -/******************************************************************************/ - -// Tx split model for 8x32 block. -static const float av1_tx_split_nn_weights_8x32_layer0[8 * 24] = { - -0.687846f, 0.121404f, -0.372905f, 0.126770f, -0.103298f, -0.101650f, - -0.148490f, -0.271740f, 0.682915f, -0.079765f, 0.634347f, -0.151503f, - 0.287692f, -0.079072f, -0.236948f, 0.065064f, 0.713383f, 0.397123f, - 0.553621f, 0.368529f, 0.767663f, -0.046601f, -0.392402f, -0.294822f, - -0.292325f, -0.010573f, -0.837945f, 0.050113f, -0.811360f, 0.199162f, - 0.150832f, 0.011602f, 0.369694f, -0.225876f, 0.234113f, -0.269808f, - 0.303805f, -0.190281f, -0.451136f, 0.209755f, -0.308894f, 0.326956f, - 0.313591f, 0.089923f, -0.095754f, 0.390981f, 0.467366f, 0.169670f, - 0.853322f, 0.054055f, 0.830319f, -0.121918f, 0.262019f, -0.093526f, - 0.385558f, 0.419174f, 0.040198f, -0.347030f, -0.450492f, -0.106764f, - 0.487502f, -0.204188f, 0.430374f, -0.116388f, 0.236407f, -0.157376f, - 0.732294f, -0.651387f, 0.347446f, 0.342575f, 0.048406f, 0.187657f, - 0.434899f, -0.447782f, 0.032728f, -0.071168f, -0.255327f, 0.104174f, - 0.095689f, -0.431743f, 0.725694f, 0.031797f, 0.523171f, 0.061801f, - 0.469804f, -0.071068f, -0.059024f, -0.211937f, 0.392134f, -0.321490f, - 0.366060f, -0.427798f, 0.166771f, 0.299652f, 0.044660f, 0.205142f, - 0.039133f, -0.051835f, -0.465475f, 0.216976f, -0.341156f, 0.095358f, - 0.230807f, 0.201674f, 0.279266f, -0.713534f, -0.091690f, -0.569708f, - -0.119001f, 0.252160f, -1.544578f, -0.284477f, 0.555348f, 0.226471f, - 0.347690f, 0.034365f, 0.770835f, -0.241859f, -0.130241f, 0.292936f, - 0.396622f, -0.417916f, 0.492224f, 0.125517f, 0.344824f, 0.232172f, - -0.432106f, -0.278745f, 0.035069f, -0.307247f, -0.120760f, 0.170950f, - 0.433601f, 0.044286f, 0.141463f, -0.041382f, 0.529346f, 0.010868f, - -0.323674f, 0.185205f, 0.623459f, 0.232842f, -0.406693f, -0.142944f, - 0.222988f, 0.343634f, 0.065401f, 0.002621f, 0.805335f, -0.426926f, - 0.279181f, 0.131364f, 0.192339f, -0.402391f, 0.544120f, -0.060618f, - 0.467780f, 0.165224f, -0.373131f, 0.002427f, 0.688064f, 0.322317f, - 0.259713f, 0.130583f, 0.185032f, -0.189111f, -0.067821f, 0.010875f, - 0.644724f, -0.179291f, 0.463222f, 0.155230f, 0.721384f, -0.046019f, - 0.438501f, 0.440027f, -0.462090f, -0.002039f, -0.468026f, -0.008890f, - -0.328530f, 0.370102f, 0.482531f, 0.043471f, -0.469732f, -0.532663f, - 0.122081f, -0.379659f, 0.037219f, -0.519913f, -0.128975f, -0.404365f, -}; - -static const float av1_tx_split_nn_bias_8x32_layer0[24] = { - -1.198965f, 0.395204f, -0.408627f, -0.021654f, -0.658355f, 0.154525f, - -0.288354f, 1.207574f, 0.411608f, 0.964678f, -1.176893f, 1.059006f, - -0.472969f, 2.087975f, 1.065536f, 0.595569f, 0.197907f, -0.349938f, - 1.013651f, -0.931093f, -0.973595f, -0.459094f, -1.253062f, 1.624782f, -}; - -static const float av1_tx_split_nn_weights_8x32_layer1[24] = { - 0.815787f, -0.393465f, -0.483427f, -0.565592f, 0.493494f, 0.430229f, - -0.507073f, -0.251379f, -0.353418f, -0.495445f, 0.820029f, 0.649146f, - -0.487383f, 1.844503f, 0.480324f, -0.982705f, -0.501446f, -0.220584f, - 0.334299f, 0.802238f, 0.805838f, -0.487848f, 0.300772f, -1.232857f, -}; - -static const float av1_tx_split_nn_bias_8x32_layer1[1] = { - 0.13435879f, -}; - -static const NN_CONFIG av1_tx_split_nnconfig_8x32 = { - 8, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 24, - }, // num_hidden_nodes - { - av1_tx_split_nn_weights_8x32_layer0, - av1_tx_split_nn_weights_8x32_layer1, - }, - { - av1_tx_split_nn_bias_8x32_layer0, - av1_tx_split_nn_bias_8x32_layer1, - }, -}; -/******************************************************************************/ - -// Tx split model for 16x32 block. -static const float av1_tx_split_nn_weights_16x64_layer0[8 * 16] = { - -0.378223f, -0.124216f, -0.514089f, -0.110117f, -0.585801f, -0.094838f, - -0.455385f, -0.220254f, -0.504568f, -0.082351f, -0.476420f, -0.253993f, - -0.454709f, -0.059461f, 0.210313f, -0.155683f, 0.192968f, -0.127804f, - 0.471996f, 0.253377f, 0.472625f, 0.485322f, 0.150560f, 0.164868f, - -0.475587f, 0.447559f, -0.455759f, -0.306665f, -0.194866f, -0.283716f, - -0.243897f, 0.293020f, -0.308298f, -0.191904f, -0.468568f, 0.014053f, - -0.618848f, 0.096273f, -0.444586f, 0.347750f, -0.280643f, -0.062872f, - 0.118661f, 0.540099f, 0.104141f, -0.279300f, -0.098721f, -0.173427f, - -0.984558f, -0.424559f, -0.411928f, -0.120875f, -0.488999f, -0.050716f, - -0.523103f, 0.093620f, -0.930396f, -0.431997f, -1.163297f, 0.190384f, - -0.422581f, -0.005354f, 0.450552f, 0.369210f, 0.562484f, 0.679922f, - 0.282099f, -0.039075f, 0.404196f, 0.006371f, 0.069679f, -0.196160f, - -0.213675f, 0.275187f, -0.104235f, -0.193090f, 0.003116f, -0.252454f, - -0.094591f, 0.210439f, -0.137070f, 0.145043f, 0.024558f, 0.121718f, - 0.010138f, 0.301651f, -0.377990f, 0.444414f, 0.001845f, -0.095334f, - 0.550259f, 0.087603f, 0.792492f, -0.044584f, 0.641706f, -0.328458f, - -0.447791f, 0.135376f, 0.356385f, 0.135748f, 0.310370f, 0.293757f, - -0.062000f, -0.056368f, 0.343930f, 0.312039f, 0.370763f, 0.452381f, - -0.023630f, -0.185909f, 0.422277f, -0.006306f, 0.045166f, 0.423359f, - -0.157735f, -0.084901f, 0.219527f, -0.209510f, 0.575057f, 0.249276f, - 0.069267f, 0.233898f, -0.229392f, 0.117197f, -0.038551f, 0.293976f, - 0.101996f, 0.120878f, -}; - -static const float av1_tx_split_nn_bias_16x64_layer0[16] = { - 1.036995f, 0.160249f, 0.100264f, 0.694881f, 0.694677f, 0.128379f, - -0.843405f, -0.405515f, 0.104139f, 0.182980f, -0.025472f, 0.901067f, - -0.299866f, -0.103079f, -0.190352f, -0.048121f, -}; - -static const float av1_tx_split_nn_weights_16x64_layer1[16] = { - -1.778868f, 0.174690f, 0.211991f, 0.712138f, 0.589352f, 0.466652f, - 1.029146f, -0.490044f, 0.483015f, 0.600215f, -0.577776f, -0.755546f, - 0.348337f, -0.205082f, 0.347129f, -0.322277f, -}; - -static const float av1_tx_split_nn_bias_16x64_layer1[1] = { - 0.04230947f, -}; - -static const NN_CONFIG av1_tx_split_nnconfig_16x64 = { - 8, // num_inputs - 1, // num_outputs - 1, // num_hidden_layers - { - 16, - }, // num_hidden_nodes - { - av1_tx_split_nn_weights_16x64_layer0, - av1_tx_split_nn_weights_16x64_layer1, - }, - { - av1_tx_split_nn_bias_16x64_layer0, - av1_tx_split_nn_bias_16x64_layer1, - }, -}; -/******************************************************************************/ - -// Map block size to its corresponding neural net model for tx split prediction. -static const NN_CONFIG *av1_tx_split_nnconfig_map[TX_SIZES_ALL] = { - NULL, // TX_4X4, - &av1_tx_split_nnconfig_8x8, // TX_8X8, - &av1_tx_split_nnconfig_16x16, // TX_16X16, - &av1_tx_split_nnconfig_32x32, // TX_32X32, - &av1_tx_split_nnconfig_64x64, // TX_64X64, - &av1_tx_split_nnconfig_4x8, // TX_4X8, - &av1_tx_split_nnconfig_4x8, // TX_8X4, - &av1_tx_split_nnconfig_8x16, // TX_8X16, - &av1_tx_split_nnconfig_8x16, // TX_16X8, - &av1_tx_split_nnconfig_16x32, // TX_16X32, - &av1_tx_split_nnconfig_16x32, // TX_32X16, - &av1_tx_split_nnconfig_32x64, // TX_32X64, - &av1_tx_split_nnconfig_32x64, // TX_64X32, - &av1_tx_split_nnconfig_4x16, // TX_4X16, - &av1_tx_split_nnconfig_4x16, // TX_16X4, - &av1_tx_split_nnconfig_8x32, // TX_8X32, - &av1_tx_split_nnconfig_8x32, // TX_32X8, - &av1_tx_split_nnconfig_16x64, // TX_16X64, - &av1_tx_split_nnconfig_16x64, // TX_64X16, -}; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif // AOM_AV1_ENCODER_TX_PRUNE_MODEL_WEIGHTS_H_ diff --git a/third_party/aom/av1/encoder/wedge_utils.c b/third_party/aom/av1/encoder/wedge_utils.c deleted file mode 100644 index e6edbb6af..000000000 --- a/third_party/aom/av1/encoder/wedge_utils.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include - -#include "aom/aom_integer.h" - -#include "aom_ports/mem.h" - -#include "aom_dsp/aom_dsp_common.h" - -#include "av1/common/reconinter.h" - -#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS) - -/** - * Computes SSE of a compound predictor constructed from 2 fundamental - * predictors p0 and p1 using blending with mask. - * - * r1: Residuals of p1. - * (source - p1) - * d: Difference of p1 and p0. - * (p1 - p0) - * m: The blending mask - * N: Number of pixels - * - * 'r1', 'd', and 'm' are contiguous. - * - * Computes: - * Sum((MAX_MASK_VALUE*r1 + mask*d)**2), which is equivalent to: - * Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2), - * where r0 is (source - p0), and r1 is (source - p1), which is in turn - * is equivalent to: - * Sum((source*MAX_MASK_VALUE - (mask*p0 + (MAX_MASK_VALUE-mask)*p1))**2), - * which is the SSE of the residuals of the compound predictor scaled up by - * MAX_MASK_VALUE**2. - * - * Note that we clamp the partial term in the loop to 16 bits signed. This is - * to facilitate equivalent SIMD implementation. It should have no effect if - * residuals are within 16 - WEDGE_WEIGHT_BITS (=10) signed, which always - * holds for 8 bit input, and on real input, it should hold practically always, - * as residuals are expected to be small. - */ -uint64_t av1_wedge_sse_from_residuals_c(const int16_t *r1, const int16_t *d, - const uint8_t *m, int N) { - uint64_t csse = 0; - int i; - - for (i = 0; i < N; i++) { - int32_t t = MAX_MASK_VALUE * r1[i] + m[i] * d[i]; - t = clamp(t, INT16_MIN, INT16_MAX); - csse += t * t; - } - return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS); -} - -/** - * Choose the mask sign for a compound predictor. - * - * ds: Difference of the squares of the residuals. - * r0**2 - r1**2 - * m: The blending mask - * N: Number of pixels - * limit: Pre-computed threshold value. - * MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2)) - * - * 'ds' and 'm' are contiguous. - * - * Returns true if the negated mask has lower SSE compared to the positive - * mask. Computation is based on: - * Sum((mask*r0 + (MAX_MASK_VALUE-mask)*r1)**2) - * > - * Sum(((MAX_MASK_VALUE-mask)*r0 + mask*r1)**2) - * - * which can be simplified to: - * - * Sum(mask*(r0**2 - r1**2)) > MAX_MASK_VALUE/2 * (sum(r0**2) - sum(r1**2)) - * - * The right hand side does not depend on the mask, and needs to be passed as - * the 'limit' parameter. - * - * After pre-computing (r0**2 - r1**2), which is passed in as 'ds', the left - * hand side is simply a scalar product between an int16_t and uint8_t vector. - * - * Note that for efficiency, ds is stored on 16 bits. Real input residuals - * being small, this should not cause a noticeable issue. - */ -int av1_wedge_sign_from_residuals_c(const int16_t *ds, const uint8_t *m, int N, - int64_t limit) { - int64_t acc = 0; - - do { - acc += *ds++ * *m++; - } while (--N); - - return acc > limit; -} - -/** - * Compute the element-wise difference of the squares of 2 arrays. - * - * d: Difference of the squares of the inputs: a**2 - b**2 - * a: First input array - * b: Second input array - * N: Number of elements - * - * 'd', 'a', and 'b' are contiguous. - * - * The result is saturated to signed 16 bits. - */ -void av1_wedge_compute_delta_squares_c(int16_t *d, const int16_t *a, - const int16_t *b, int N) { - int i; - - for (i = 0; i < N; i++) - d[i] = clamp(a[i] * a[i] - b[i] * b[i], INT16_MIN, INT16_MAX); -} diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm1d_sse4.c b/third_party/aom/av1/encoder/x86/av1_fwd_txfm1d_sse4.c deleted file mode 100644 index 07615543c..000000000 --- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm1d_sse4.c +++ /dev/null @@ -1,1217 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "av1/encoder/x86/av1_txfm1d_sse4.h" - -void av1_fdct32_new_sse4_1(const __m128i *input, __m128i *output, - int8_t cos_bit) { - __m128i buf0[32]; - __m128i buf1[32]; - const int32_t *cospi; - // stage 0 - // stage 1 - buf1[0] = _mm_add_epi32(input[0], input[31]); - buf1[31] = _mm_sub_epi32(input[0], input[31]); - buf1[1] = _mm_add_epi32(input[1], input[30]); - buf1[30] = _mm_sub_epi32(input[1], input[30]); - buf1[2] = _mm_add_epi32(input[2], input[29]); - buf1[29] = _mm_sub_epi32(input[2], input[29]); - buf1[3] = _mm_add_epi32(input[3], input[28]); - buf1[28] = _mm_sub_epi32(input[3], input[28]); - buf1[4] = _mm_add_epi32(input[4], input[27]); - buf1[27] = _mm_sub_epi32(input[4], input[27]); - buf1[5] = _mm_add_epi32(input[5], input[26]); - buf1[26] = _mm_sub_epi32(input[5], input[26]); - buf1[6] = _mm_add_epi32(input[6], input[25]); - buf1[25] = _mm_sub_epi32(input[6], input[25]); - buf1[7] = _mm_add_epi32(input[7], input[24]); - buf1[24] = _mm_sub_epi32(input[7], input[24]); - buf1[8] = _mm_add_epi32(input[8], input[23]); - buf1[23] = _mm_sub_epi32(input[8], input[23]); - buf1[9] = _mm_add_epi32(input[9], input[22]); - buf1[22] = _mm_sub_epi32(input[9], input[22]); - buf1[10] = _mm_add_epi32(input[10], input[21]); - buf1[21] = _mm_sub_epi32(input[10], input[21]); - buf1[11] = _mm_add_epi32(input[11], input[20]); - buf1[20] = _mm_sub_epi32(input[11], input[20]); - buf1[12] = _mm_add_epi32(input[12], input[19]); - buf1[19] = _mm_sub_epi32(input[12], input[19]); - buf1[13] = _mm_add_epi32(input[13], input[18]); - buf1[18] = _mm_sub_epi32(input[13], input[18]); - buf1[14] = _mm_add_epi32(input[14], input[17]); - buf1[17] = _mm_sub_epi32(input[14], input[17]); - buf1[15] = _mm_add_epi32(input[15], input[16]); - buf1[16] = _mm_sub_epi32(input[15], input[16]); - - // stage 2 - cospi = cospi_arr(cos_bit); - buf0[0] = _mm_add_epi32(buf1[0], buf1[15]); - buf0[15] = _mm_sub_epi32(buf1[0], buf1[15]); - buf0[1] = _mm_add_epi32(buf1[1], buf1[14]); - buf0[14] = _mm_sub_epi32(buf1[1], buf1[14]); - buf0[2] = _mm_add_epi32(buf1[2], buf1[13]); - buf0[13] = _mm_sub_epi32(buf1[2], buf1[13]); - buf0[3] = _mm_add_epi32(buf1[3], buf1[12]); - buf0[12] = _mm_sub_epi32(buf1[3], buf1[12]); - buf0[4] = _mm_add_epi32(buf1[4], buf1[11]); - buf0[11] = _mm_sub_epi32(buf1[4], buf1[11]); - buf0[5] = _mm_add_epi32(buf1[5], buf1[10]); - buf0[10] = _mm_sub_epi32(buf1[5], buf1[10]); - buf0[6] = _mm_add_epi32(buf1[6], buf1[9]); - buf0[9] = _mm_sub_epi32(buf1[6], buf1[9]); - buf0[7] = _mm_add_epi32(buf1[7], buf1[8]); - buf0[8] = _mm_sub_epi32(buf1[7], buf1[8]); - buf0[16] = buf1[16]; - buf0[17] = buf1[17]; - buf0[18] = buf1[18]; - buf0[19] = buf1[19]; - btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[20], buf1[27], buf0[20], - buf0[27], cos_bit); - btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[21], buf1[26], buf0[21], - buf0[26], cos_bit); - btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[22], buf1[25], buf0[22], - buf0[25], cos_bit); - btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[23], buf1[24], buf0[23], - buf0[24], cos_bit); - buf0[28] = buf1[28]; - buf0[29] = buf1[29]; - buf0[30] = buf1[30]; - buf0[31] = buf1[31]; - - // stage 3 - cospi = cospi_arr(cos_bit); - buf1[0] = _mm_add_epi32(buf0[0], buf0[7]); - buf1[7] = _mm_sub_epi32(buf0[0], buf0[7]); - buf1[1] = _mm_add_epi32(buf0[1], buf0[6]); - buf1[6] = _mm_sub_epi32(buf0[1], buf0[6]); - buf1[2] = _mm_add_epi32(buf0[2], buf0[5]); - buf1[5] = _mm_sub_epi32(buf0[2], buf0[5]); - buf1[3] = _mm_add_epi32(buf0[3], buf0[4]); - buf1[4] = _mm_sub_epi32(buf0[3], buf0[4]); - buf1[8] = buf0[8]; - buf1[9] = buf0[9]; - btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[10], buf0[13], buf1[10], - buf1[13], cos_bit); - btf_32_sse4_1_type0(-cospi[32], cospi[32], buf0[11], buf0[12], buf1[11], - buf1[12], cos_bit); - buf1[14] = buf0[14]; - buf1[15] = buf0[15]; - buf1[16] = _mm_add_epi32(buf0[16], buf0[23]); - buf1[23] = _mm_sub_epi32(buf0[16], buf0[23]); - buf1[17] = _mm_add_epi32(buf0[17], buf0[22]); - buf1[22] = _mm_sub_epi32(buf0[17], buf0[22]); - buf1[18] = _mm_add_epi32(buf0[18], buf0[21]); - buf1[21] = _mm_sub_epi32(buf0[18], buf0[21]); - buf1[19] = _mm_add_epi32(buf0[19], buf0[20]); - buf1[20] = _mm_sub_epi32(buf0[19], buf0[20]); - buf1[24] = _mm_sub_epi32(buf0[31], buf0[24]); - buf1[31] = _mm_add_epi32(buf0[31], buf0[24]); - buf1[25] = _mm_sub_epi32(buf0[30], buf0[25]); - buf1[30] = _mm_add_epi32(buf0[30], buf0[25]); - buf1[26] = _mm_sub_epi32(buf0[29], buf0[26]); - buf1[29] = _mm_add_epi32(buf0[29], buf0[26]); - buf1[27] = _mm_sub_epi32(buf0[28], buf0[27]); - buf1[28] = _mm_add_epi32(buf0[28], buf0[27]); - - // stage 4 - cospi = cospi_arr(cos_bit); - buf0[0] = _mm_add_epi32(buf1[0], buf1[3]); - buf0[3] = _mm_sub_epi32(buf1[0], buf1[3]); - buf0[1] = _mm_add_epi32(buf1[1], buf1[2]); - buf0[2] = _mm_sub_epi32(buf1[1], buf1[2]); - buf0[4] = buf1[4]; - btf_32_sse4_1_type0(-cospi[32], cospi[32], buf1[5], buf1[6], buf0[5], buf0[6], - cos_bit); - buf0[7] = buf1[7]; - buf0[8] = _mm_add_epi32(buf1[8], buf1[11]); - buf0[11] = _mm_sub_epi32(buf1[8], buf1[11]); - buf0[9] = _mm_add_epi32(buf1[9], buf1[10]); - buf0[10] = _mm_sub_epi32(buf1[9], buf1[10]); - buf0[12] = _mm_sub_epi32(buf1[15], buf1[12]); - buf0[15] = _mm_add_epi32(buf1[15], buf1[12]); - buf0[13] = _mm_sub_epi32(buf1[14], buf1[13]); - buf0[14] = _mm_add_epi32(buf1[14], buf1[13]); - buf0[16] = buf1[16]; - buf0[17] = buf1[17]; - btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[18], buf1[29], buf0[18], - buf0[29], cos_bit); - btf_32_sse4_1_type0(-cospi[16], cospi[48], buf1[19], buf1[28], buf0[19], - buf0[28], cos_bit); - btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[20], buf1[27], buf0[20], - buf0[27], cos_bit); - btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf1[21], buf1[26], buf0[21], - buf0[26], cos_bit); - buf0[22] = buf1[22]; - buf0[23] = buf1[23]; - buf0[24] = buf1[24]; - buf0[25] = buf1[25]; - buf0[30] = buf1[30]; - buf0[31] = buf1[31]; - - // stage 5 - cospi = cospi_arr(cos_bit); - btf_32_sse4_1_type0(cospi[32], cospi[32], buf0[0], buf0[1], buf1[0], buf1[1], - cos_bit); - btf_32_sse4_1_type1(cospi[48], cospi[16], buf0[2], buf0[3], buf1[2], buf1[3], - cos_bit); - buf1[4] = _mm_add_epi32(buf0[4], buf0[5]); - buf1[5] = _mm_sub_epi32(buf0[4], buf0[5]); - buf1[6] = _mm_sub_epi32(buf0[7], buf0[6]); - buf1[7] = _mm_add_epi32(buf0[7], buf0[6]); - buf1[8] = buf0[8]; - btf_32_sse4_1_type0(-cospi[16], cospi[48], buf0[9], buf0[14], buf1[9], - buf1[14], cos_bit); - btf_32_sse4_1_type0(-cospi[48], -cospi[16], buf0[10], buf0[13], buf1[10], - buf1[13], cos_bit); - buf1[11] = buf0[11]; - buf1[12] = buf0[12]; - buf1[15] = buf0[15]; - buf1[16] = _mm_add_epi32(buf0[16], buf0[19]); - buf1[19] = _mm_sub_epi32(buf0[16], buf0[19]); - buf1[17] = _mm_add_epi32(buf0[17], buf0[18]); - buf1[18] = _mm_sub_epi32(buf0[17], buf0[18]); - buf1[20] = _mm_sub_epi32(buf0[23], buf0[20]); - buf1[23] = _mm_add_epi32(buf0[23], buf0[20]); - buf1[21] = _mm_sub_epi32(buf0[22], buf0[21]); - buf1[22] = _mm_add_epi32(buf0[22], buf0[21]); - buf1[24] = _mm_add_epi32(buf0[24], buf0[27]); - buf1[27] = _mm_sub_epi32(buf0[24], buf0[27]); - buf1[25] = _mm_add_epi32(buf0[25], buf0[26]); - buf1[26] = _mm_sub_epi32(buf0[25], buf0[26]); - buf1[28] = _mm_sub_epi32(buf0[31], buf0[28]); - buf1[31] = _mm_add_epi32(buf0[31], buf0[28]); - buf1[29] = _mm_sub_epi32(buf0[30], buf0[29]); - buf1[30] = _mm_add_epi32(buf0[30], buf0[29]); - - // stage 6 - cospi = cospi_arr(cos_bit); - buf0[0] = buf1[0]; - buf0[1] = buf1[1]; - buf0[2] = buf1[2]; - buf0[3] = buf1[3]; - btf_32_sse4_1_type1(cospi[56], cospi[8], buf1[4], buf1[7], buf0[4], buf0[7], - cos_bit); - btf_32_sse4_1_type1(cospi[24], cospi[40], buf1[5], buf1[6], buf0[5], buf0[6], - cos_bit); - buf0[8] = _mm_add_epi32(buf1[8], buf1[9]); - buf0[9] = _mm_sub_epi32(buf1[8], buf1[9]); - buf0[10] = _mm_sub_epi32(buf1[11], buf1[10]); - buf0[11] = _mm_add_epi32(buf1[11], buf1[10]); - buf0[12] = _mm_add_epi32(buf1[12], buf1[13]); - buf0[13] = _mm_sub_epi32(buf1[12], buf1[13]); - buf0[14] = _mm_sub_epi32(buf1[15], buf1[14]); - buf0[15] = _mm_add_epi32(buf1[15], buf1[14]); - buf0[16] = buf1[16]; - btf_32_sse4_1_type0(-cospi[8], cospi[56], buf1[17], buf1[30], buf0[17], - buf0[30], cos_bit); - btf_32_sse4_1_type0(-cospi[56], -cospi[8], buf1[18], buf1[29], buf0[18], - buf0[29], cos_bit); - buf0[19] = buf1[19]; - buf0[20] = buf1[20]; - btf_32_sse4_1_type0(-cospi[40], cospi[24], buf1[21], buf1[26], buf0[21], - buf0[26], cos_bit); - btf_32_sse4_1_type0(-cospi[24], -cospi[40], buf1[22], buf1[25], buf0[22], - buf0[25], cos_bit); - buf0[23] = buf1[23]; - buf0[24] = buf1[24]; - buf0[27] = buf1[27]; - buf0[28] = buf1[28]; - buf0[31] = buf1[31]; - - // stage 7 - cospi = cospi_arr(cos_bit); - buf1[0] = buf0[0]; - buf1[1] = buf0[1]; - buf1[2] = buf0[2]; - buf1[3] = buf0[3]; - buf1[4] = buf0[4]; - buf1[5] = buf0[5]; - buf1[6] = buf0[6]; - buf1[7] = buf0[7]; - btf_32_sse4_1_type1(cospi[60], cospi[4], buf0[8], buf0[15], buf1[8], buf1[15], - cos_bit); - btf_32_sse4_1_type1(cospi[28], cospi[36], buf0[9], buf0[14], buf1[9], - buf1[14], cos_bit); - btf_32_sse4_1_type1(cospi[44], cospi[20], buf0[10], buf0[13], buf1[10], - buf1[13], cos_bit); - btf_32_sse4_1_type1(cospi[12], cospi[52], buf0[11], buf0[12], buf1[11], - buf1[12], cos_bit); - buf1[16] = _mm_add_epi32(buf0[16], buf0[17]); - buf1[17] = _mm_sub_epi32(buf0[16], buf0[17]); - buf1[18] = _mm_sub_epi32(buf0[19], buf0[18]); - buf1[19] = _mm_add_epi32(buf0[19], buf0[18]); - buf1[20] = _mm_add_epi32(buf0[20], buf0[21]); - buf1[21] = _mm_sub_epi32(buf0[20], buf0[21]); - buf1[22] = _mm_sub_epi32(buf0[23], buf0[22]); - buf1[23] = _mm_add_epi32(buf0[23], buf0[22]); - buf1[24] = _mm_add_epi32(buf0[24], buf0[25]); - buf1[25] = _mm_sub_epi32(buf0[24], buf0[25]); - buf1[26] = _mm_sub_epi32(buf0[27], buf0[26]); - buf1[27] = _mm_add_epi32(buf0[27], buf0[26]); - buf1[28] = _mm_add_epi32(buf0[28], buf0[29]); - buf1[29] = _mm_sub_epi32(buf0[28], buf0[29]); - buf1[30] = _mm_sub_epi32(buf0[31], buf0[30]); - buf1[31] = _mm_add_epi32(buf0[31], buf0[30]); - - // stage 8 - cospi = cospi_arr(cos_bit); - buf0[0] = buf1[0]; - buf0[1] = buf1[1]; - buf0[2] = buf1[2]; - buf0[3] = buf1[3]; - buf0[4] = buf1[4]; - buf0[5] = buf1[5]; - buf0[6] = buf1[6]; - buf0[7] = buf1[7]; - buf0[8] = buf1[8]; - buf0[9] = buf1[9]; - buf0[10] = buf1[10]; - buf0[11] = buf1[11]; - buf0[12] = buf1[12]; - buf0[13] = buf1[13]; - buf0[14] = buf1[14]; - buf0[15] = buf1[15]; - btf_32_sse4_1_type1(cospi[62], cospi[2], buf1[16], buf1[31], buf0[16], - buf0[31], cos_bit); - btf_32_sse4_1_type1(cospi[30], cospi[34], buf1[17], buf1[30], buf0[17], - buf0[30], cos_bit); - btf_32_sse4_1_type1(cospi[46], cospi[18], buf1[18], buf1[29], buf0[18], - buf0[29], cos_bit); - btf_32_sse4_1_type1(cospi[14], cospi[50], buf1[19], buf1[28], buf0[19], - buf0[28], cos_bit); - btf_32_sse4_1_type1(cospi[54], cospi[10], buf1[20], buf1[27], buf0[20], - buf0[27], cos_bit); - btf_32_sse4_1_type1(cospi[22], cospi[42], buf1[21], buf1[26], buf0[21], - buf0[26], cos_bit); - btf_32_sse4_1_type1(cospi[38], cospi[26], buf1[22], buf1[25], buf0[22], - buf0[25], cos_bit); - btf_32_sse4_1_type1(cospi[6], cospi[58], buf1[23], buf1[24], buf0[23], - buf0[24], cos_bit); - - // stage 9 - output[0] = buf0[0]; - output[1] = buf0[16]; - output[2] = buf0[8]; - output[3] = buf0[24]; - output[4] = buf0[4]; - output[5] = buf0[20]; - output[6] = buf0[12]; - output[7] = buf0[28]; - output[8] = buf0[2]; - output[9] = buf0[18]; - output[10] = buf0[10]; - output[11] = buf0[26]; - output[12] = buf0[6]; - output[13] = buf0[22]; - output[14] = buf0[14]; - output[15] = buf0[30]; - output[16] = buf0[1]; - output[17] = buf0[17]; - output[18] = buf0[9]; - output[19] = buf0[25]; - output[20] = buf0[5]; - output[21] = buf0[21]; - output[22] = buf0[13]; - output[23] = buf0[29]; - output[24] = buf0[3]; - output[25] = buf0[19]; - output[26] = buf0[11]; - output[27] = buf0[27]; - output[28] = buf0[7]; - output[29] = buf0[23]; - output[30] = buf0[15]; - output[31] = buf0[31]; -} - -void av1_fadst4_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range) { - const int txfm_size = 4; - const int num_per_128 = 4; - const int32_t *cospi; - __m128i buf0[4]; - __m128i buf1[4]; - int col_num = txfm_size / num_per_128; - int col; - (void)stage_range; - for (col = 0; col < col_num; col++) { - // stage 0; - int32_t stage_idx = 0; - int j; - for (j = 0; j < 4; ++j) { - buf0[j] = input[j * col_num + col]; - } - - // stage 1 - stage_idx++; - buf1[0] = buf0[3]; - buf1[1] = buf0[0]; - buf1[2] = buf0[1]; - buf1[3] = buf0[2]; - - // stage 2 - stage_idx++; - - cospi = cospi_arr(cos_bit); - btf_32_sse4_1_type0(cospi[8], cospi[56], buf1[0], buf1[1], buf0[0], buf0[1], - cos_bit); - btf_32_sse4_1_type0(cospi[40], cospi[24], buf1[2], buf1[3], buf0[2], - buf0[3], cos_bit); - - // stage 3 - stage_idx++; - buf1[0] = _mm_add_epi32(buf0[0], buf0[2]); - buf1[2] = _mm_sub_epi32(buf0[0], buf0[2]); - buf1[1] = _mm_add_epi32(buf0[1], buf0[3]); - buf1[3] = _mm_sub_epi32(buf0[1], buf0[3]); - - // stage 4 - stage_idx++; - - cospi = cospi_arr(cos_bit); - buf0[0] = buf1[0]; - buf0[1] = buf1[1]; - btf_32_sse4_1_type0(cospi[32], cospi[32], buf1[2], buf1[3], buf0[2], - buf0[3], cos_bit); - - // stage 5 - stage_idx++; - buf1[0] = buf0[0]; - buf1[1] = _mm_sub_epi32(_mm_setzero_si128(), buf0[2]); - buf1[2] = buf0[3]; - buf1[3] = _mm_sub_epi32(_mm_setzero_si128(), buf0[1]); - - for (j = 0; j < 4; ++j) { - output[j * col_num + col] = buf1[j]; - } - } -} - -void av1_fdct64_new_sse4_1(const __m128i *input, __m128i *output, - int8_t cos_bit, const int instride, - const int outstride) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1)); - - __m128i cospi_m32 = _mm_set1_epi32(-cospi[32]); - __m128i cospi_p32 = _mm_set1_epi32(cospi[32]); - __m128i cospi_m16 = _mm_set1_epi32(-cospi[16]); - __m128i cospi_p48 = _mm_set1_epi32(cospi[48]); - __m128i cospi_m48 = _mm_set1_epi32(-cospi[48]); - __m128i cospi_p16 = _mm_set1_epi32(cospi[16]); - __m128i cospi_m08 = _mm_set1_epi32(-cospi[8]); - __m128i cospi_p56 = _mm_set1_epi32(cospi[56]); - __m128i cospi_m56 = _mm_set1_epi32(-cospi[56]); - __m128i cospi_m40 = _mm_set1_epi32(-cospi[40]); - __m128i cospi_p24 = _mm_set1_epi32(cospi[24]); - __m128i cospi_m24 = _mm_set1_epi32(-cospi[24]); - __m128i cospi_p08 = _mm_set1_epi32(cospi[8]); - __m128i cospi_p40 = _mm_set1_epi32(cospi[40]); - __m128i cospi_p60 = _mm_set1_epi32(cospi[60]); - __m128i cospi_p04 = _mm_set1_epi32(cospi[4]); - __m128i cospi_p28 = _mm_set1_epi32(cospi[28]); - __m128i cospi_p36 = _mm_set1_epi32(cospi[36]); - __m128i cospi_p44 = _mm_set1_epi32(cospi[44]); - __m128i cospi_p20 = _mm_set1_epi32(cospi[20]); - __m128i cospi_p12 = _mm_set1_epi32(cospi[12]); - __m128i cospi_p52 = _mm_set1_epi32(cospi[52]); - __m128i cospi_m04 = _mm_set1_epi32(-cospi[4]); - __m128i cospi_m60 = _mm_set1_epi32(-cospi[60]); - __m128i cospi_m36 = _mm_set1_epi32(-cospi[36]); - __m128i cospi_m28 = _mm_set1_epi32(-cospi[28]); - __m128i cospi_m20 = _mm_set1_epi32(-cospi[20]); - __m128i cospi_m44 = _mm_set1_epi32(-cospi[44]); - __m128i cospi_m52 = _mm_set1_epi32(-cospi[52]); - __m128i cospi_m12 = _mm_set1_epi32(-cospi[12]); - __m128i cospi_p62 = _mm_set1_epi32(cospi[62]); - __m128i cospi_p02 = _mm_set1_epi32(cospi[2]); - __m128i cospi_p30 = _mm_set1_epi32(cospi[30]); - __m128i cospi_p34 = _mm_set1_epi32(cospi[34]); - __m128i cospi_p46 = _mm_set1_epi32(cospi[46]); - __m128i cospi_p18 = _mm_set1_epi32(cospi[18]); - __m128i cospi_p14 = _mm_set1_epi32(cospi[14]); - __m128i cospi_p50 = _mm_set1_epi32(cospi[50]); - __m128i cospi_p54 = _mm_set1_epi32(cospi[54]); - __m128i cospi_p10 = _mm_set1_epi32(cospi[10]); - __m128i cospi_p22 = _mm_set1_epi32(cospi[22]); - __m128i cospi_p42 = _mm_set1_epi32(cospi[42]); - __m128i cospi_p38 = _mm_set1_epi32(cospi[38]); - __m128i cospi_p26 = _mm_set1_epi32(cospi[26]); - __m128i cospi_p06 = _mm_set1_epi32(cospi[6]); - __m128i cospi_p58 = _mm_set1_epi32(cospi[58]); - __m128i cospi_p63 = _mm_set1_epi32(cospi[63]); - __m128i cospi_p01 = _mm_set1_epi32(cospi[1]); - __m128i cospi_p31 = _mm_set1_epi32(cospi[31]); - __m128i cospi_p33 = _mm_set1_epi32(cospi[33]); - __m128i cospi_p47 = _mm_set1_epi32(cospi[47]); - __m128i cospi_p17 = _mm_set1_epi32(cospi[17]); - __m128i cospi_p15 = _mm_set1_epi32(cospi[15]); - __m128i cospi_p49 = _mm_set1_epi32(cospi[49]); - __m128i cospi_p55 = _mm_set1_epi32(cospi[55]); - __m128i cospi_p09 = _mm_set1_epi32(cospi[9]); - __m128i cospi_p23 = _mm_set1_epi32(cospi[23]); - __m128i cospi_p41 = _mm_set1_epi32(cospi[41]); - __m128i cospi_p39 = _mm_set1_epi32(cospi[39]); - __m128i cospi_p25 = _mm_set1_epi32(cospi[25]); - __m128i cospi_p07 = _mm_set1_epi32(cospi[7]); - __m128i cospi_p57 = _mm_set1_epi32(cospi[57]); - __m128i cospi_p59 = _mm_set1_epi32(cospi[59]); - __m128i cospi_p05 = _mm_set1_epi32(cospi[5]); - __m128i cospi_p27 = _mm_set1_epi32(cospi[27]); - __m128i cospi_p37 = _mm_set1_epi32(cospi[37]); - __m128i cospi_p43 = _mm_set1_epi32(cospi[43]); - __m128i cospi_p21 = _mm_set1_epi32(cospi[21]); - __m128i cospi_p11 = _mm_set1_epi32(cospi[11]); - __m128i cospi_p53 = _mm_set1_epi32(cospi[53]); - __m128i cospi_p51 = _mm_set1_epi32(cospi[51]); - __m128i cospi_p13 = _mm_set1_epi32(cospi[13]); - __m128i cospi_p19 = _mm_set1_epi32(cospi[19]); - __m128i cospi_p45 = _mm_set1_epi32(cospi[45]); - __m128i cospi_p35 = _mm_set1_epi32(cospi[35]); - __m128i cospi_p29 = _mm_set1_epi32(cospi[29]); - __m128i cospi_p03 = _mm_set1_epi32(cospi[3]); - __m128i cospi_p61 = _mm_set1_epi32(cospi[61]); - - // stage 1 - __m128i x1[64]; - x1[0] = _mm_add_epi32(input[0 * instride], input[63 * instride]); - x1[63] = _mm_sub_epi32(input[0 * instride], input[63 * instride]); - x1[1] = _mm_add_epi32(input[1 * instride], input[62 * instride]); - x1[62] = _mm_sub_epi32(input[1 * instride], input[62 * instride]); - x1[2] = _mm_add_epi32(input[2 * instride], input[61 * instride]); - x1[61] = _mm_sub_epi32(input[2 * instride], input[61 * instride]); - x1[3] = _mm_add_epi32(input[3 * instride], input[60 * instride]); - x1[60] = _mm_sub_epi32(input[3 * instride], input[60 * instride]); - x1[4] = _mm_add_epi32(input[4 * instride], input[59 * instride]); - x1[59] = _mm_sub_epi32(input[4 * instride], input[59 * instride]); - x1[5] = _mm_add_epi32(input[5 * instride], input[58 * instride]); - x1[58] = _mm_sub_epi32(input[5 * instride], input[58 * instride]); - x1[6] = _mm_add_epi32(input[6 * instride], input[57 * instride]); - x1[57] = _mm_sub_epi32(input[6 * instride], input[57 * instride]); - x1[7] = _mm_add_epi32(input[7 * instride], input[56 * instride]); - x1[56] = _mm_sub_epi32(input[7 * instride], input[56 * instride]); - x1[8] = _mm_add_epi32(input[8 * instride], input[55 * instride]); - x1[55] = _mm_sub_epi32(input[8 * instride], input[55 * instride]); - x1[9] = _mm_add_epi32(input[9 * instride], input[54 * instride]); - x1[54] = _mm_sub_epi32(input[9 * instride], input[54 * instride]); - x1[10] = _mm_add_epi32(input[10 * instride], input[53 * instride]); - x1[53] = _mm_sub_epi32(input[10 * instride], input[53 * instride]); - x1[11] = _mm_add_epi32(input[11 * instride], input[52 * instride]); - x1[52] = _mm_sub_epi32(input[11 * instride], input[52 * instride]); - x1[12] = _mm_add_epi32(input[12 * instride], input[51 * instride]); - x1[51] = _mm_sub_epi32(input[12 * instride], input[51 * instride]); - x1[13] = _mm_add_epi32(input[13 * instride], input[50 * instride]); - x1[50] = _mm_sub_epi32(input[13 * instride], input[50 * instride]); - x1[14] = _mm_add_epi32(input[14 * instride], input[49 * instride]); - x1[49] = _mm_sub_epi32(input[14 * instride], input[49 * instride]); - x1[15] = _mm_add_epi32(input[15 * instride], input[48 * instride]); - x1[48] = _mm_sub_epi32(input[15 * instride], input[48 * instride]); - x1[16] = _mm_add_epi32(input[16 * instride], input[47 * instride]); - x1[47] = _mm_sub_epi32(input[16 * instride], input[47 * instride]); - x1[17] = _mm_add_epi32(input[17 * instride], input[46 * instride]); - x1[46] = _mm_sub_epi32(input[17 * instride], input[46 * instride]); - x1[18] = _mm_add_epi32(input[18 * instride], input[45 * instride]); - x1[45] = _mm_sub_epi32(input[18 * instride], input[45 * instride]); - x1[19] = _mm_add_epi32(input[19 * instride], input[44 * instride]); - x1[44] = _mm_sub_epi32(input[19 * instride], input[44 * instride]); - x1[20] = _mm_add_epi32(input[20 * instride], input[43 * instride]); - x1[43] = _mm_sub_epi32(input[20 * instride], input[43 * instride]); - x1[21] = _mm_add_epi32(input[21 * instride], input[42 * instride]); - x1[42] = _mm_sub_epi32(input[21 * instride], input[42 * instride]); - x1[22] = _mm_add_epi32(input[22 * instride], input[41 * instride]); - x1[41] = _mm_sub_epi32(input[22 * instride], input[41 * instride]); - x1[23] = _mm_add_epi32(input[23 * instride], input[40 * instride]); - x1[40] = _mm_sub_epi32(input[23 * instride], input[40 * instride]); - x1[24] = _mm_add_epi32(input[24 * instride], input[39 * instride]); - x1[39] = _mm_sub_epi32(input[24 * instride], input[39 * instride]); - x1[25] = _mm_add_epi32(input[25 * instride], input[38 * instride]); - x1[38] = _mm_sub_epi32(input[25 * instride], input[38 * instride]); - x1[26] = _mm_add_epi32(input[26 * instride], input[37 * instride]); - x1[37] = _mm_sub_epi32(input[26 * instride], input[37 * instride]); - x1[27] = _mm_add_epi32(input[27 * instride], input[36 * instride]); - x1[36] = _mm_sub_epi32(input[27 * instride], input[36 * instride]); - x1[28] = _mm_add_epi32(input[28 * instride], input[35 * instride]); - x1[35] = _mm_sub_epi32(input[28 * instride], input[35 * instride]); - x1[29] = _mm_add_epi32(input[29 * instride], input[34 * instride]); - x1[34] = _mm_sub_epi32(input[29 * instride], input[34 * instride]); - x1[30] = _mm_add_epi32(input[30 * instride], input[33 * instride]); - x1[33] = _mm_sub_epi32(input[30 * instride], input[33 * instride]); - x1[31] = _mm_add_epi32(input[31 * instride], input[32 * instride]); - x1[32] = _mm_sub_epi32(input[31 * instride], input[32 * instride]); - - // stage 2 - __m128i x2[64]; - x2[0] = _mm_add_epi32(x1[0], x1[31]); - x2[31] = _mm_sub_epi32(x1[0], x1[31]); - x2[1] = _mm_add_epi32(x1[1], x1[30]); - x2[30] = _mm_sub_epi32(x1[1], x1[30]); - x2[2] = _mm_add_epi32(x1[2], x1[29]); - x2[29] = _mm_sub_epi32(x1[2], x1[29]); - x2[3] = _mm_add_epi32(x1[3], x1[28]); - x2[28] = _mm_sub_epi32(x1[3], x1[28]); - x2[4] = _mm_add_epi32(x1[4], x1[27]); - x2[27] = _mm_sub_epi32(x1[4], x1[27]); - x2[5] = _mm_add_epi32(x1[5], x1[26]); - x2[26] = _mm_sub_epi32(x1[5], x1[26]); - x2[6] = _mm_add_epi32(x1[6], x1[25]); - x2[25] = _mm_sub_epi32(x1[6], x1[25]); - x2[7] = _mm_add_epi32(x1[7], x1[24]); - x2[24] = _mm_sub_epi32(x1[7], x1[24]); - x2[8] = _mm_add_epi32(x1[8], x1[23]); - x2[23] = _mm_sub_epi32(x1[8], x1[23]); - x2[9] = _mm_add_epi32(x1[9], x1[22]); - x2[22] = _mm_sub_epi32(x1[9], x1[22]); - x2[10] = _mm_add_epi32(x1[10], x1[21]); - x2[21] = _mm_sub_epi32(x1[10], x1[21]); - x2[11] = _mm_add_epi32(x1[11], x1[20]); - x2[20] = _mm_sub_epi32(x1[11], x1[20]); - x2[12] = _mm_add_epi32(x1[12], x1[19]); - x2[19] = _mm_sub_epi32(x1[12], x1[19]); - x2[13] = _mm_add_epi32(x1[13], x1[18]); - x2[18] = _mm_sub_epi32(x1[13], x1[18]); - x2[14] = _mm_add_epi32(x1[14], x1[17]); - x2[17] = _mm_sub_epi32(x1[14], x1[17]); - x2[15] = _mm_add_epi32(x1[15], x1[16]); - x2[16] = _mm_sub_epi32(x1[15], x1[16]); - x2[32] = x1[32]; - x2[33] = x1[33]; - x2[34] = x1[34]; - x2[35] = x1[35]; - x2[36] = x1[36]; - x2[37] = x1[37]; - x2[38] = x1[38]; - x2[39] = x1[39]; - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[40], x1[55], x2[40], x2[55], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[41], x1[54], x2[41], x2[54], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[42], x1[53], x2[42], x2[53], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[43], x1[52], x2[43], x2[52], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[44], x1[51], x2[44], x2[51], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[45], x1[50], x2[45], x2[50], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[46], x1[49], x2[46], x2[49], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x1[47], x1[48], x2[47], x2[48], - __rounding, cos_bit); - x2[56] = x1[56]; - x2[57] = x1[57]; - x2[58] = x1[58]; - x2[59] = x1[59]; - x2[60] = x1[60]; - x2[61] = x1[61]; - x2[62] = x1[62]; - x2[63] = x1[63]; - - // stage 3 - __m128i x3[64]; - x3[0] = _mm_add_epi32(x2[0], x2[15]); - x3[15] = _mm_sub_epi32(x2[0], x2[15]); - x3[1] = _mm_add_epi32(x2[1], x2[14]); - x3[14] = _mm_sub_epi32(x2[1], x2[14]); - x3[2] = _mm_add_epi32(x2[2], x2[13]); - x3[13] = _mm_sub_epi32(x2[2], x2[13]); - x3[3] = _mm_add_epi32(x2[3], x2[12]); - x3[12] = _mm_sub_epi32(x2[3], x2[12]); - x3[4] = _mm_add_epi32(x2[4], x2[11]); - x3[11] = _mm_sub_epi32(x2[4], x2[11]); - x3[5] = _mm_add_epi32(x2[5], x2[10]); - x3[10] = _mm_sub_epi32(x2[5], x2[10]); - x3[6] = _mm_add_epi32(x2[6], x2[9]); - x3[9] = _mm_sub_epi32(x2[6], x2[9]); - x3[7] = _mm_add_epi32(x2[7], x2[8]); - x3[8] = _mm_sub_epi32(x2[7], x2[8]); - x3[16] = x2[16]; - x3[17] = x2[17]; - x3[18] = x2[18]; - x3[19] = x2[19]; - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x2[20], x2[27], x3[20], x3[27], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x2[21], x2[26], x3[21], x3[26], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x2[22], x2[25], x3[22], x3[25], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x2[23], x2[24], x3[23], x3[24], - __rounding, cos_bit); - x3[28] = x2[28]; - x3[29] = x2[29]; - x3[30] = x2[30]; - x3[31] = x2[31]; - x3[32] = _mm_add_epi32(x2[32], x2[47]); - x3[47] = _mm_sub_epi32(x2[32], x2[47]); - x3[33] = _mm_add_epi32(x2[33], x2[46]); - x3[46] = _mm_sub_epi32(x2[33], x2[46]); - x3[34] = _mm_add_epi32(x2[34], x2[45]); - x3[45] = _mm_sub_epi32(x2[34], x2[45]); - x3[35] = _mm_add_epi32(x2[35], x2[44]); - x3[44] = _mm_sub_epi32(x2[35], x2[44]); - x3[36] = _mm_add_epi32(x2[36], x2[43]); - x3[43] = _mm_sub_epi32(x2[36], x2[43]); - x3[37] = _mm_add_epi32(x2[37], x2[42]); - x3[42] = _mm_sub_epi32(x2[37], x2[42]); - x3[38] = _mm_add_epi32(x2[38], x2[41]); - x3[41] = _mm_sub_epi32(x2[38], x2[41]); - x3[39] = _mm_add_epi32(x2[39], x2[40]); - x3[40] = _mm_sub_epi32(x2[39], x2[40]); - x3[48] = _mm_sub_epi32(x2[63], x2[48]); - x3[63] = _mm_add_epi32(x2[63], x2[48]); - x3[49] = _mm_sub_epi32(x2[62], x2[49]); - x3[62] = _mm_add_epi32(x2[62], x2[49]); - x3[50] = _mm_sub_epi32(x2[61], x2[50]); - x3[61] = _mm_add_epi32(x2[61], x2[50]); - x3[51] = _mm_sub_epi32(x2[60], x2[51]); - x3[60] = _mm_add_epi32(x2[60], x2[51]); - x3[52] = _mm_sub_epi32(x2[59], x2[52]); - x3[59] = _mm_add_epi32(x2[59], x2[52]); - x3[53] = _mm_sub_epi32(x2[58], x2[53]); - x3[58] = _mm_add_epi32(x2[58], x2[53]); - x3[54] = _mm_sub_epi32(x2[57], x2[54]); - x3[57] = _mm_add_epi32(x2[57], x2[54]); - x3[55] = _mm_sub_epi32(x2[56], x2[55]); - x3[56] = _mm_add_epi32(x2[56], x2[55]); - - // stage 4 - __m128i x4[64]; - x4[0] = _mm_add_epi32(x3[0], x3[7]); - x4[7] = _mm_sub_epi32(x3[0], x3[7]); - x4[1] = _mm_add_epi32(x3[1], x3[6]); - x4[6] = _mm_sub_epi32(x3[1], x3[6]); - x4[2] = _mm_add_epi32(x3[2], x3[5]); - x4[5] = _mm_sub_epi32(x3[2], x3[5]); - x4[3] = _mm_add_epi32(x3[3], x3[4]); - x4[4] = _mm_sub_epi32(x3[3], x3[4]); - x4[8] = x3[8]; - x4[9] = x3[9]; - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x3[10], x3[13], x4[10], x4[13], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x3[11], x3[12], x4[11], x4[12], - __rounding, cos_bit); - x4[14] = x3[14]; - x4[15] = x3[15]; - x4[16] = _mm_add_epi32(x3[16], x3[23]); - x4[23] = _mm_sub_epi32(x3[16], x3[23]); - x4[17] = _mm_add_epi32(x3[17], x3[22]); - x4[22] = _mm_sub_epi32(x3[17], x3[22]); - x4[18] = _mm_add_epi32(x3[18], x3[21]); - x4[21] = _mm_sub_epi32(x3[18], x3[21]); - x4[19] = _mm_add_epi32(x3[19], x3[20]); - x4[20] = _mm_sub_epi32(x3[19], x3[20]); - x4[24] = _mm_sub_epi32(x3[31], x3[24]); - x4[31] = _mm_add_epi32(x3[31], x3[24]); - x4[25] = _mm_sub_epi32(x3[30], x3[25]); - x4[30] = _mm_add_epi32(x3[30], x3[25]); - x4[26] = _mm_sub_epi32(x3[29], x3[26]); - x4[29] = _mm_add_epi32(x3[29], x3[26]); - x4[27] = _mm_sub_epi32(x3[28], x3[27]); - x4[28] = _mm_add_epi32(x3[28], x3[27]); - x4[32] = x3[32]; - x4[33] = x3[33]; - x4[34] = x3[34]; - x4[35] = x3[35]; - btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x3[36], x3[59], x4[36], x4[59], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x3[37], x3[58], x4[37], x4[58], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x3[38], x3[57], x4[38], x4[57], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x3[39], x3[56], x4[39], x4[56], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x3[40], x3[55], x4[40], x4[55], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x3[41], x3[54], x4[41], x4[54], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x3[42], x3[53], x4[42], x4[53], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x3[43], x3[52], x4[43], x4[52], - __rounding, cos_bit); - x4[44] = x3[44]; - x4[45] = x3[45]; - x4[46] = x3[46]; - x4[47] = x3[47]; - x4[48] = x3[48]; - x4[49] = x3[49]; - x4[50] = x3[50]; - x4[51] = x3[51]; - x4[60] = x3[60]; - x4[61] = x3[61]; - x4[62] = x3[62]; - x4[63] = x3[63]; - - // stage 5 - __m128i x5[64]; - x5[0] = _mm_add_epi32(x4[0], x4[3]); - x5[3] = _mm_sub_epi32(x4[0], x4[3]); - x5[1] = _mm_add_epi32(x4[1], x4[2]); - x5[2] = _mm_sub_epi32(x4[1], x4[2]); - x5[4] = x4[4]; - btf_32_type0_sse4_1_new(cospi_m32, cospi_p32, x4[5], x4[6], x5[5], x5[6], - __rounding, cos_bit); - x5[7] = x4[7]; - x5[8] = _mm_add_epi32(x4[8], x4[11]); - x5[11] = _mm_sub_epi32(x4[8], x4[11]); - x5[9] = _mm_add_epi32(x4[9], x4[10]); - x5[10] = _mm_sub_epi32(x4[9], x4[10]); - x5[12] = _mm_sub_epi32(x4[15], x4[12]); - x5[15] = _mm_add_epi32(x4[15], x4[12]); - x5[13] = _mm_sub_epi32(x4[14], x4[13]); - x5[14] = _mm_add_epi32(x4[14], x4[13]); - x5[16] = x4[16]; - x5[17] = x4[17]; - btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x4[18], x4[29], x5[18], x5[29], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x4[19], x4[28], x5[19], x5[28], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x4[20], x4[27], x5[20], x5[27], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x4[21], x4[26], x5[21], x5[26], - __rounding, cos_bit); - x5[22] = x4[22]; - x5[23] = x4[23]; - x5[24] = x4[24]; - x5[25] = x4[25]; - x5[30] = x4[30]; - x5[31] = x4[31]; - x5[32] = _mm_add_epi32(x4[32], x4[39]); - x5[39] = _mm_sub_epi32(x4[32], x4[39]); - x5[33] = _mm_add_epi32(x4[33], x4[38]); - x5[38] = _mm_sub_epi32(x4[33], x4[38]); - x5[34] = _mm_add_epi32(x4[34], x4[37]); - x5[37] = _mm_sub_epi32(x4[34], x4[37]); - x5[35] = _mm_add_epi32(x4[35], x4[36]); - x5[36] = _mm_sub_epi32(x4[35], x4[36]); - x5[40] = _mm_sub_epi32(x4[47], x4[40]); - x5[47] = _mm_add_epi32(x4[47], x4[40]); - x5[41] = _mm_sub_epi32(x4[46], x4[41]); - x5[46] = _mm_add_epi32(x4[46], x4[41]); - x5[42] = _mm_sub_epi32(x4[45], x4[42]); - x5[45] = _mm_add_epi32(x4[45], x4[42]); - x5[43] = _mm_sub_epi32(x4[44], x4[43]); - x5[44] = _mm_add_epi32(x4[44], x4[43]); - x5[48] = _mm_add_epi32(x4[48], x4[55]); - x5[55] = _mm_sub_epi32(x4[48], x4[55]); - x5[49] = _mm_add_epi32(x4[49], x4[54]); - x5[54] = _mm_sub_epi32(x4[49], x4[54]); - x5[50] = _mm_add_epi32(x4[50], x4[53]); - x5[53] = _mm_sub_epi32(x4[50], x4[53]); - x5[51] = _mm_add_epi32(x4[51], x4[52]); - x5[52] = _mm_sub_epi32(x4[51], x4[52]); - x5[56] = _mm_sub_epi32(x4[63], x4[56]); - x5[63] = _mm_add_epi32(x4[63], x4[56]); - x5[57] = _mm_sub_epi32(x4[62], x4[57]); - x5[62] = _mm_add_epi32(x4[62], x4[57]); - x5[58] = _mm_sub_epi32(x4[61], x4[58]); - x5[61] = _mm_add_epi32(x4[61], x4[58]); - x5[59] = _mm_sub_epi32(x4[60], x4[59]); - x5[60] = _mm_add_epi32(x4[60], x4[59]); - - // stage 6 - __m128i x6[64]; - btf_32_type0_sse4_1_new(cospi_p32, cospi_p32, x5[0], x5[1], x6[0], x6[1], - __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p48, cospi_p16, x5[2], x5[3], x6[2], x6[3], - __rounding, cos_bit); - x6[4] = _mm_add_epi32(x5[4], x5[5]); - x6[5] = _mm_sub_epi32(x5[4], x5[5]); - x6[6] = _mm_sub_epi32(x5[7], x5[6]); - x6[7] = _mm_add_epi32(x5[7], x5[6]); - x6[8] = x5[8]; - btf_32_type0_sse4_1_new(cospi_m16, cospi_p48, x5[9], x5[14], x6[9], x6[14], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m48, cospi_m16, x5[10], x5[13], x6[10], x6[13], - __rounding, cos_bit); - x6[11] = x5[11]; - x6[12] = x5[12]; - x6[15] = x5[15]; - x6[16] = _mm_add_epi32(x5[16], x5[19]); - x6[19] = _mm_sub_epi32(x5[16], x5[19]); - x6[17] = _mm_add_epi32(x5[17], x5[18]); - x6[18] = _mm_sub_epi32(x5[17], x5[18]); - x6[20] = _mm_sub_epi32(x5[23], x5[20]); - x6[23] = _mm_add_epi32(x5[23], x5[20]); - x6[21] = _mm_sub_epi32(x5[22], x5[21]); - x6[22] = _mm_add_epi32(x5[22], x5[21]); - x6[24] = _mm_add_epi32(x5[24], x5[27]); - x6[27] = _mm_sub_epi32(x5[24], x5[27]); - x6[25] = _mm_add_epi32(x5[25], x5[26]); - x6[26] = _mm_sub_epi32(x5[25], x5[26]); - x6[28] = _mm_sub_epi32(x5[31], x5[28]); - x6[31] = _mm_add_epi32(x5[31], x5[28]); - x6[29] = _mm_sub_epi32(x5[30], x5[29]); - x6[30] = _mm_add_epi32(x5[30], x5[29]); - x6[32] = x5[32]; - x6[33] = x5[33]; - btf_32_type0_sse4_1_new(cospi_m08, cospi_p56, x5[34], x5[61], x6[34], x6[61], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m08, cospi_p56, x5[35], x5[60], x6[35], x6[60], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m56, cospi_m08, x5[36], x5[59], x6[36], x6[59], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m56, cospi_m08, x5[37], x5[58], x6[37], x6[58], - __rounding, cos_bit); - x6[38] = x5[38]; - x6[39] = x5[39]; - x6[40] = x5[40]; - x6[41] = x5[41]; - btf_32_type0_sse4_1_new(cospi_m40, cospi_p24, x5[42], x5[53], x6[42], x6[53], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m40, cospi_p24, x5[43], x5[52], x6[43], x6[52], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m24, cospi_m40, x5[44], x5[51], x6[44], x6[51], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m24, cospi_m40, x5[45], x5[50], x6[45], x6[50], - __rounding, cos_bit); - x6[46] = x5[46]; - x6[47] = x5[47]; - x6[48] = x5[48]; - x6[49] = x5[49]; - x6[54] = x5[54]; - x6[55] = x5[55]; - x6[56] = x5[56]; - x6[57] = x5[57]; - x6[62] = x5[62]; - x6[63] = x5[63]; - - // stage 7 - __m128i x7[64]; - x7[0] = x6[0]; - x7[1] = x6[1]; - x7[2] = x6[2]; - x7[3] = x6[3]; - btf_32_type1_sse4_1_new(cospi_p56, cospi_p08, x6[4], x6[7], x7[4], x7[7], - __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p24, cospi_p40, x6[5], x6[6], x7[5], x7[6], - __rounding, cos_bit); - x7[8] = _mm_add_epi32(x6[8], x6[9]); - x7[9] = _mm_sub_epi32(x6[8], x6[9]); - x7[10] = _mm_sub_epi32(x6[11], x6[10]); - x7[11] = _mm_add_epi32(x6[11], x6[10]); - x7[12] = _mm_add_epi32(x6[12], x6[13]); - x7[13] = _mm_sub_epi32(x6[12], x6[13]); - x7[14] = _mm_sub_epi32(x6[15], x6[14]); - x7[15] = _mm_add_epi32(x6[15], x6[14]); - x7[16] = x6[16]; - btf_32_type0_sse4_1_new(cospi_m08, cospi_p56, x6[17], x6[30], x7[17], x7[30], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m56, cospi_m08, x6[18], x6[29], x7[18], x7[29], - __rounding, cos_bit); - x7[19] = x6[19]; - x7[20] = x6[20]; - btf_32_type0_sse4_1_new(cospi_m40, cospi_p24, x6[21], x6[26], x7[21], x7[26], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m24, cospi_m40, x6[22], x6[25], x7[22], x7[25], - __rounding, cos_bit); - x7[23] = x6[23]; - x7[24] = x6[24]; - x7[27] = x6[27]; - x7[28] = x6[28]; - x7[31] = x6[31]; - x7[32] = _mm_add_epi32(x6[32], x6[35]); - x7[35] = _mm_sub_epi32(x6[32], x6[35]); - x7[33] = _mm_add_epi32(x6[33], x6[34]); - x7[34] = _mm_sub_epi32(x6[33], x6[34]); - x7[36] = _mm_sub_epi32(x6[39], x6[36]); - x7[39] = _mm_add_epi32(x6[39], x6[36]); - x7[37] = _mm_sub_epi32(x6[38], x6[37]); - x7[38] = _mm_add_epi32(x6[38], x6[37]); - x7[40] = _mm_add_epi32(x6[40], x6[43]); - x7[43] = _mm_sub_epi32(x6[40], x6[43]); - x7[41] = _mm_add_epi32(x6[41], x6[42]); - x7[42] = _mm_sub_epi32(x6[41], x6[42]); - x7[44] = _mm_sub_epi32(x6[47], x6[44]); - x7[47] = _mm_add_epi32(x6[47], x6[44]); - x7[45] = _mm_sub_epi32(x6[46], x6[45]); - x7[46] = _mm_add_epi32(x6[46], x6[45]); - x7[48] = _mm_add_epi32(x6[48], x6[51]); - x7[51] = _mm_sub_epi32(x6[48], x6[51]); - x7[49] = _mm_add_epi32(x6[49], x6[50]); - x7[50] = _mm_sub_epi32(x6[49], x6[50]); - x7[52] = _mm_sub_epi32(x6[55], x6[52]); - x7[55] = _mm_add_epi32(x6[55], x6[52]); - x7[53] = _mm_sub_epi32(x6[54], x6[53]); - x7[54] = _mm_add_epi32(x6[54], x6[53]); - x7[56] = _mm_add_epi32(x6[56], x6[59]); - x7[59] = _mm_sub_epi32(x6[56], x6[59]); - x7[57] = _mm_add_epi32(x6[57], x6[58]); - x7[58] = _mm_sub_epi32(x6[57], x6[58]); - x7[60] = _mm_sub_epi32(x6[63], x6[60]); - x7[63] = _mm_add_epi32(x6[63], x6[60]); - x7[61] = _mm_sub_epi32(x6[62], x6[61]); - x7[62] = _mm_add_epi32(x6[62], x6[61]); - - // stage 8 - __m128i x8[64]; - x8[0] = x7[0]; - x8[1] = x7[1]; - x8[2] = x7[2]; - x8[3] = x7[3]; - x8[4] = x7[4]; - x8[5] = x7[5]; - x8[6] = x7[6]; - x8[7] = x7[7]; - btf_32_type1_sse4_1_new(cospi_p60, cospi_p04, x7[8], x7[15], x8[8], x8[15], - __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p28, cospi_p36, x7[9], x7[14], x8[9], x8[14], - __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p44, cospi_p20, x7[10], x7[13], x8[10], x8[13], - __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p12, cospi_p52, x7[11], x7[12], x8[11], x8[12], - __rounding, cos_bit); - x8[16] = _mm_add_epi32(x7[16], x7[17]); - x8[17] = _mm_sub_epi32(x7[16], x7[17]); - x8[18] = _mm_sub_epi32(x7[19], x7[18]); - x8[19] = _mm_add_epi32(x7[19], x7[18]); - x8[20] = _mm_add_epi32(x7[20], x7[21]); - x8[21] = _mm_sub_epi32(x7[20], x7[21]); - x8[22] = _mm_sub_epi32(x7[23], x7[22]); - x8[23] = _mm_add_epi32(x7[23], x7[22]); - x8[24] = _mm_add_epi32(x7[24], x7[25]); - x8[25] = _mm_sub_epi32(x7[24], x7[25]); - x8[26] = _mm_sub_epi32(x7[27], x7[26]); - x8[27] = _mm_add_epi32(x7[27], x7[26]); - x8[28] = _mm_add_epi32(x7[28], x7[29]); - x8[29] = _mm_sub_epi32(x7[28], x7[29]); - x8[30] = _mm_sub_epi32(x7[31], x7[30]); - x8[31] = _mm_add_epi32(x7[31], x7[30]); - x8[32] = x7[32]; - btf_32_type0_sse4_1_new(cospi_m04, cospi_p60, x7[33], x7[62], x8[33], x8[62], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m60, cospi_m04, x7[34], x7[61], x8[34], x8[61], - __rounding, cos_bit); - x8[35] = x7[35]; - x8[36] = x7[36]; - btf_32_type0_sse4_1_new(cospi_m36, cospi_p28, x7[37], x7[58], x8[37], x8[58], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m28, cospi_m36, x7[38], x7[57], x8[38], x8[57], - __rounding, cos_bit); - x8[39] = x7[39]; - x8[40] = x7[40]; - btf_32_type0_sse4_1_new(cospi_m20, cospi_p44, x7[41], x7[54], x8[41], x8[54], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m44, cospi_m20, x7[42], x7[53], x8[42], x8[53], - __rounding, cos_bit); - x8[43] = x7[43]; - x8[44] = x7[44]; - btf_32_type0_sse4_1_new(cospi_m52, cospi_p12, x7[45], x7[50], x8[45], x8[50], - __rounding, cos_bit); - btf_32_type0_sse4_1_new(cospi_m12, cospi_m52, x7[46], x7[49], x8[46], x8[49], - __rounding, cos_bit); - x8[47] = x7[47]; - x8[48] = x7[48]; - x8[51] = x7[51]; - x8[52] = x7[52]; - x8[55] = x7[55]; - x8[56] = x7[56]; - x8[59] = x7[59]; - x8[60] = x7[60]; - x8[63] = x7[63]; - - // stage 9 - __m128i x9[64]; - x9[0] = x8[0]; - x9[1] = x8[1]; - x9[2] = x8[2]; - x9[3] = x8[3]; - x9[4] = x8[4]; - x9[5] = x8[5]; - x9[6] = x8[6]; - x9[7] = x8[7]; - x9[8] = x8[8]; - x9[9] = x8[9]; - x9[10] = x8[10]; - x9[11] = x8[11]; - x9[12] = x8[12]; - x9[13] = x8[13]; - x9[14] = x8[14]; - x9[15] = x8[15]; - btf_32_type1_sse4_1_new(cospi_p62, cospi_p02, x8[16], x8[31], x9[16], x9[31], - __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p30, cospi_p34, x8[17], x8[30], x9[17], x9[30], - __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p46, cospi_p18, x8[18], x8[29], x9[18], x9[29], - __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p14, cospi_p50, x8[19], x8[28], x9[19], x9[28], - __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p54, cospi_p10, x8[20], x8[27], x9[20], x9[27], - __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p22, cospi_p42, x8[21], x8[26], x9[21], x9[26], - __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p38, cospi_p26, x8[22], x8[25], x9[22], x9[25], - __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p06, cospi_p58, x8[23], x8[24], x9[23], x9[24], - __rounding, cos_bit); - x9[32] = _mm_add_epi32(x8[32], x8[33]); - x9[33] = _mm_sub_epi32(x8[32], x8[33]); - x9[34] = _mm_sub_epi32(x8[35], x8[34]); - x9[35] = _mm_add_epi32(x8[35], x8[34]); - x9[36] = _mm_add_epi32(x8[36], x8[37]); - x9[37] = _mm_sub_epi32(x8[36], x8[37]); - x9[38] = _mm_sub_epi32(x8[39], x8[38]); - x9[39] = _mm_add_epi32(x8[39], x8[38]); - x9[40] = _mm_add_epi32(x8[40], x8[41]); - x9[41] = _mm_sub_epi32(x8[40], x8[41]); - x9[42] = _mm_sub_epi32(x8[43], x8[42]); - x9[43] = _mm_add_epi32(x8[43], x8[42]); - x9[44] = _mm_add_epi32(x8[44], x8[45]); - x9[45] = _mm_sub_epi32(x8[44], x8[45]); - x9[46] = _mm_sub_epi32(x8[47], x8[46]); - x9[47] = _mm_add_epi32(x8[47], x8[46]); - x9[48] = _mm_add_epi32(x8[48], x8[49]); - x9[49] = _mm_sub_epi32(x8[48], x8[49]); - x9[50] = _mm_sub_epi32(x8[51], x8[50]); - x9[51] = _mm_add_epi32(x8[51], x8[50]); - x9[52] = _mm_add_epi32(x8[52], x8[53]); - x9[53] = _mm_sub_epi32(x8[52], x8[53]); - x9[54] = _mm_sub_epi32(x8[55], x8[54]); - x9[55] = _mm_add_epi32(x8[55], x8[54]); - x9[56] = _mm_add_epi32(x8[56], x8[57]); - x9[57] = _mm_sub_epi32(x8[56], x8[57]); - x9[58] = _mm_sub_epi32(x8[59], x8[58]); - x9[59] = _mm_add_epi32(x8[59], x8[58]); - x9[60] = _mm_add_epi32(x8[60], x8[61]); - x9[61] = _mm_sub_epi32(x8[60], x8[61]); - x9[62] = _mm_sub_epi32(x8[63], x8[62]); - x9[63] = _mm_add_epi32(x8[63], x8[62]); - - // stage 10 - __m128i x10[64]; - x10[0] = x9[0]; - x10[1] = x9[1]; - x10[2] = x9[2]; - x10[3] = x9[3]; - x10[4] = x9[4]; - x10[5] = x9[5]; - x10[6] = x9[6]; - x10[7] = x9[7]; - x10[8] = x9[8]; - x10[9] = x9[9]; - x10[10] = x9[10]; - x10[11] = x9[11]; - x10[12] = x9[12]; - x10[13] = x9[13]; - x10[14] = x9[14]; - x10[15] = x9[15]; - x10[16] = x9[16]; - x10[17] = x9[17]; - x10[18] = x9[18]; - x10[19] = x9[19]; - x10[20] = x9[20]; - x10[21] = x9[21]; - x10[22] = x9[22]; - x10[23] = x9[23]; - x10[24] = x9[24]; - x10[25] = x9[25]; - x10[26] = x9[26]; - x10[27] = x9[27]; - x10[28] = x9[28]; - x10[29] = x9[29]; - x10[30] = x9[30]; - x10[31] = x9[31]; - btf_32_type1_sse4_1_new(cospi_p63, cospi_p01, x9[32], x9[63], x10[32], - x10[63], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p31, cospi_p33, x9[33], x9[62], x10[33], - x10[62], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p47, cospi_p17, x9[34], x9[61], x10[34], - x10[61], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p15, cospi_p49, x9[35], x9[60], x10[35], - x10[60], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p55, cospi_p09, x9[36], x9[59], x10[36], - x10[59], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p23, cospi_p41, x9[37], x9[58], x10[37], - x10[58], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p39, cospi_p25, x9[38], x9[57], x10[38], - x10[57], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p07, cospi_p57, x9[39], x9[56], x10[39], - x10[56], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p59, cospi_p05, x9[40], x9[55], x10[40], - x10[55], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p27, cospi_p37, x9[41], x9[54], x10[41], - x10[54], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p43, cospi_p21, x9[42], x9[53], x10[42], - x10[53], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p11, cospi_p53, x9[43], x9[52], x10[43], - x10[52], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p51, cospi_p13, x9[44], x9[51], x10[44], - x10[51], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p19, cospi_p45, x9[45], x9[50], x10[45], - x10[50], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p35, cospi_p29, x9[46], x9[49], x10[46], - x10[49], __rounding, cos_bit); - btf_32_type1_sse4_1_new(cospi_p03, cospi_p61, x9[47], x9[48], x10[47], - x10[48], __rounding, cos_bit); - - // stage 11 - output[0 * outstride] = x10[0]; - output[1 * outstride] = x10[32]; - output[2 * outstride] = x10[16]; - output[3 * outstride] = x10[48]; - output[4 * outstride] = x10[8]; - output[5 * outstride] = x10[40]; - output[6 * outstride] = x10[24]; - output[7 * outstride] = x10[56]; - output[8 * outstride] = x10[4]; - output[9 * outstride] = x10[36]; - output[10 * outstride] = x10[20]; - output[11 * outstride] = x10[52]; - output[12 * outstride] = x10[12]; - output[13 * outstride] = x10[44]; - output[14 * outstride] = x10[28]; - output[15 * outstride] = x10[60]; - output[16 * outstride] = x10[2]; - output[17 * outstride] = x10[34]; - output[18 * outstride] = x10[18]; - output[19 * outstride] = x10[50]; - output[20 * outstride] = x10[10]; - output[21 * outstride] = x10[42]; - output[22 * outstride] = x10[26]; - output[23 * outstride] = x10[58]; - output[24 * outstride] = x10[6]; - output[25 * outstride] = x10[38]; - output[26 * outstride] = x10[22]; - output[27 * outstride] = x10[54]; - output[28 * outstride] = x10[14]; - output[29 * outstride] = x10[46]; - output[30 * outstride] = x10[30]; - output[31 * outstride] = x10[62]; - output[32 * outstride] = x10[1]; - output[33 * outstride] = x10[33]; - output[34 * outstride] = x10[17]; - output[35 * outstride] = x10[49]; - output[36 * outstride] = x10[9]; - output[37 * outstride] = x10[41]; - output[38 * outstride] = x10[25]; - output[39 * outstride] = x10[57]; - output[40 * outstride] = x10[5]; - output[41 * outstride] = x10[37]; - output[42 * outstride] = x10[21]; - output[43 * outstride] = x10[53]; - output[44 * outstride] = x10[13]; - output[45 * outstride] = x10[45]; - output[46 * outstride] = x10[29]; - output[47 * outstride] = x10[61]; - output[48 * outstride] = x10[3]; - output[49 * outstride] = x10[35]; - output[50 * outstride] = x10[19]; - output[51 * outstride] = x10[51]; - output[52 * outstride] = x10[11]; - output[53 * outstride] = x10[43]; - output[54 * outstride] = x10[27]; - output[55 * outstride] = x10[59]; - output[56 * outstride] = x10[7]; - output[57 * outstride] = x10[39]; - output[58 * outstride] = x10[23]; - output[59 * outstride] = x10[55]; - output[60 * outstride] = x10[15]; - output[61 * outstride] = x10[47]; - output[62 * outstride] = x10[31]; - output[63 * outstride] = x10[63]; -} diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_avx2.c b/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_avx2.c deleted file mode 100644 index 592462e20..000000000 --- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_avx2.c +++ /dev/null @@ -1,2068 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/av1_rtcd.h" - -#include "av1/common/enums.h" -#include "av1/common/av1_txfm.h" -#include "av1/encoder/x86/av1_fwd_txfm_avx2.h" -#include "av1/common/x86/av1_txfm_sse2.h" -#include "av1/encoder/av1_fwd_txfm1d_cfg.h" -#include "av1/encoder/x86/av1_txfm1d_sse4.h" -#include "av1/encoder/x86/av1_fwd_txfm_sse2.h" -#include "aom_dsp/x86/txfm_common_avx2.h" - -static INLINE void fdct16x16_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1)); - - __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]); - __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]); - __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]); - __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]); - __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]); - __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]); - __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]); - __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]); - __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]); - __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]); - __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]); - __m256i cospi_p28_p36 = pair_set_w16_epi16(cospi[28], cospi[36]); - __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]); - __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]); - __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]); - __m256i cospi_p12_p52 = pair_set_w16_epi16(cospi[12], cospi[52]); - __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]); - - // stage 1 - __m256i x1[16]; - btf_16_adds_subs_out_avx2(&x1[0], &x1[15], input[0], input[15]); - btf_16_adds_subs_out_avx2(&x1[1], &x1[14], input[1], input[14]); - btf_16_adds_subs_out_avx2(&x1[2], &x1[13], input[2], input[13]); - btf_16_adds_subs_out_avx2(&x1[3], &x1[12], input[3], input[12]); - btf_16_adds_subs_out_avx2(&x1[4], &x1[11], input[4], input[11]); - btf_16_adds_subs_out_avx2(&x1[5], &x1[10], input[5], input[10]); - btf_16_adds_subs_out_avx2(&x1[6], &x1[9], input[6], input[9]); - btf_16_adds_subs_out_avx2(&x1[7], &x1[8], input[7], input[8]); - - // stage 2 - btf_16_adds_subs_avx2(&x1[0], &x1[7]); - btf_16_adds_subs_avx2(&x1[1], &x1[6]); - btf_16_adds_subs_avx2(&x1[2], &x1[5]); - btf_16_adds_subs_avx2(&x1[3], &x1[4]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[10], &x1[13], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[11], &x1[12], _r, cos_bit); - - // stage 3 - btf_16_adds_subs_avx2(&x1[0], &x1[3]); - btf_16_adds_subs_avx2(&x1[1], &x1[2]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[5], &x1[6], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[8], &x1[11]); - btf_16_adds_subs_avx2(&x1[9], &x1[10]); - btf_16_adds_subs_avx2(&x1[15], &x1[12]); - btf_16_adds_subs_avx2(&x1[14], &x1[13]); - - // stage 4 - btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[0], &x1[1], _r, cos_bit); - btf_16_w16_avx2(cospi_p48_p16, cospi_m16_p48, &x1[2], &x1[3], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[4], &x1[5]); - btf_16_adds_subs_avx2(&x1[7], &x1[6]); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[9], &x1[14], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[10], &x1[13], _r, cos_bit); - - // stage 5 - btf_16_w16_avx2(cospi_p56_p08, cospi_m08_p56, &x1[4], &x1[7], _r, cos_bit); - btf_16_w16_avx2(cospi_p24_p40, cospi_m40_p24, &x1[5], &x1[6], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[8], &x1[9]); - btf_16_adds_subs_avx2(&x1[11], &x1[10]); - btf_16_adds_subs_avx2(&x1[12], &x1[13]); - btf_16_adds_subs_avx2(&x1[15], &x1[14]); - - // stage 6 - btf_16_w16_avx2(cospi_p60_p04, cospi_m04_p60, &x1[8], &x1[15], _r, cos_bit); - btf_16_w16_avx2(cospi_p28_p36, cospi_m36_p28, &x1[9], &x1[14], _r, cos_bit); - btf_16_w16_avx2(cospi_p44_p20, cospi_m20_p44, &x1[10], &x1[13], _r, cos_bit); - btf_16_w16_avx2(cospi_p12_p52, cospi_m52_p12, &x1[11], &x1[12], _r, cos_bit); - - // stage 7 - output[0] = x1[0]; - output[1] = x1[8]; - output[2] = x1[4]; - output[3] = x1[12]; - output[4] = x1[2]; - output[5] = x1[10]; - output[6] = x1[6]; - output[7] = x1[14]; - output[8] = x1[1]; - output[9] = x1[9]; - output[10] = x1[5]; - output[11] = x1[13]; - output[12] = x1[3]; - output[13] = x1[11]; - output[14] = x1[7]; - output[15] = x1[15]; -} - -static INLINE void fdct16x32_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1)); - - __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]); - __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]); - __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]); - __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]); - __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]); - __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]); - __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]); - __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]); - __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]); - __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]); - __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]); - __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]); - __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]); - __m256i cospi_p28_p36 = pair_set_w16_epi16(cospi[28], cospi[36]); - __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]); - __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]); - __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]); - __m256i cospi_p12_p52 = pair_set_w16_epi16(cospi[12], cospi[52]); - __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]); - __m256i cospi_p62_p02 = pair_set_w16_epi16(cospi[62], cospi[2]); - __m256i cospi_m02_p62 = pair_set_w16_epi16(-cospi[2], cospi[62]); - __m256i cospi_p30_p34 = pair_set_w16_epi16(cospi[30], cospi[34]); - __m256i cospi_m34_p30 = pair_set_w16_epi16(-cospi[34], cospi[30]); - __m256i cospi_p46_p18 = pair_set_w16_epi16(cospi[46], cospi[18]); - __m256i cospi_m18_p46 = pair_set_w16_epi16(-cospi[18], cospi[46]); - __m256i cospi_p14_p50 = pair_set_w16_epi16(cospi[14], cospi[50]); - __m256i cospi_m50_p14 = pair_set_w16_epi16(-cospi[50], cospi[14]); - __m256i cospi_p54_p10 = pair_set_w16_epi16(cospi[54], cospi[10]); - __m256i cospi_m10_p54 = pair_set_w16_epi16(-cospi[10], cospi[54]); - __m256i cospi_p22_p42 = pair_set_w16_epi16(cospi[22], cospi[42]); - __m256i cospi_m42_p22 = pair_set_w16_epi16(-cospi[42], cospi[22]); - __m256i cospi_p38_p26 = pair_set_w16_epi16(cospi[38], cospi[26]); - __m256i cospi_m26_p38 = pair_set_w16_epi16(-cospi[26], cospi[38]); - __m256i cospi_p06_p58 = pair_set_w16_epi16(cospi[6], cospi[58]); - __m256i cospi_m58_p06 = pair_set_w16_epi16(-cospi[58], cospi[6]); - - // stage 1 - __m256i x1[32]; - btf_16_adds_subs_out_avx2(&x1[0], &x1[31], input[0], input[31]); - btf_16_adds_subs_out_avx2(&x1[1], &x1[30], input[1], input[30]); - btf_16_adds_subs_out_avx2(&x1[2], &x1[29], input[2], input[29]); - btf_16_adds_subs_out_avx2(&x1[3], &x1[28], input[3], input[28]); - btf_16_adds_subs_out_avx2(&x1[4], &x1[27], input[4], input[27]); - btf_16_adds_subs_out_avx2(&x1[5], &x1[26], input[5], input[26]); - btf_16_adds_subs_out_avx2(&x1[6], &x1[25], input[6], input[25]); - btf_16_adds_subs_out_avx2(&x1[7], &x1[24], input[7], input[24]); - btf_16_adds_subs_out_avx2(&x1[8], &x1[23], input[8], input[23]); - btf_16_adds_subs_out_avx2(&x1[9], &x1[22], input[9], input[22]); - btf_16_adds_subs_out_avx2(&x1[10], &x1[21], input[10], input[21]); - btf_16_adds_subs_out_avx2(&x1[11], &x1[20], input[11], input[20]); - btf_16_adds_subs_out_avx2(&x1[12], &x1[19], input[12], input[19]); - btf_16_adds_subs_out_avx2(&x1[13], &x1[18], input[13], input[18]); - btf_16_adds_subs_out_avx2(&x1[14], &x1[17], input[14], input[17]); - btf_16_adds_subs_out_avx2(&x1[15], &x1[16], input[15], input[16]); - - // stage 2 - btf_16_adds_subs_avx2(&x1[0], &x1[15]); - btf_16_adds_subs_avx2(&x1[1], &x1[14]); - btf_16_adds_subs_avx2(&x1[2], &x1[13]); - btf_16_adds_subs_avx2(&x1[3], &x1[12]); - btf_16_adds_subs_avx2(&x1[4], &x1[11]); - btf_16_adds_subs_avx2(&x1[5], &x1[10]); - btf_16_adds_subs_avx2(&x1[6], &x1[9]); - btf_16_adds_subs_avx2(&x1[7], &x1[8]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[20], &x1[27], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[21], &x1[26], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[22], &x1[25], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[23], &x1[24], _r, cos_bit); - - // stage 3 - btf_16_adds_subs_avx2(&x1[0], &x1[7]); - btf_16_adds_subs_avx2(&x1[1], &x1[6]); - btf_16_adds_subs_avx2(&x1[2], &x1[5]); - btf_16_adds_subs_avx2(&x1[3], &x1[4]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[10], &x1[13], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[11], &x1[12], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[16], &x1[23]); - btf_16_adds_subs_avx2(&x1[17], &x1[22]); - btf_16_adds_subs_avx2(&x1[18], &x1[21]); - btf_16_adds_subs_avx2(&x1[19], &x1[20]); - btf_16_adds_subs_avx2(&x1[31], &x1[24]); - btf_16_adds_subs_avx2(&x1[30], &x1[25]); - btf_16_adds_subs_avx2(&x1[29], &x1[26]); - btf_16_adds_subs_avx2(&x1[28], &x1[27]); - - // stage 4 - btf_16_adds_subs_avx2(&x1[0], &x1[3]); - btf_16_adds_subs_avx2(&x1[1], &x1[2]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[5], &x1[6], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[8], &x1[11]); - btf_16_adds_subs_avx2(&x1[9], &x1[10]); - btf_16_adds_subs_avx2(&x1[15], &x1[12]); - btf_16_adds_subs_avx2(&x1[14], &x1[13]); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[18], &x1[29], _r, cos_bit); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[19], &x1[28], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[20], &x1[27], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[21], &x1[26], _r, cos_bit); - - // stage 5 - btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[0], &x1[1], _r, cos_bit); - btf_16_w16_avx2(cospi_p48_p16, cospi_m16_p48, &x1[2], &x1[3], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[4], &x1[5]); - btf_16_adds_subs_avx2(&x1[7], &x1[6]); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[9], &x1[14], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[10], &x1[13], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[16], &x1[19]); - btf_16_adds_subs_avx2(&x1[17], &x1[18]); - btf_16_adds_subs_avx2(&x1[23], &x1[20]); - btf_16_adds_subs_avx2(&x1[22], &x1[21]); - btf_16_adds_subs_avx2(&x1[24], &x1[27]); - btf_16_adds_subs_avx2(&x1[25], &x1[26]); - btf_16_adds_subs_avx2(&x1[31], &x1[28]); - btf_16_adds_subs_avx2(&x1[30], &x1[29]); - - // stage 6 - btf_16_w16_avx2(cospi_p56_p08, cospi_m08_p56, &x1[4], &x1[7], _r, cos_bit); - btf_16_w16_avx2(cospi_p24_p40, cospi_m40_p24, &x1[5], &x1[6], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[8], &x1[9]); - btf_16_adds_subs_avx2(&x1[11], &x1[10]); - btf_16_adds_subs_avx2(&x1[12], &x1[13]); - btf_16_adds_subs_avx2(&x1[15], &x1[14]); - btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x1[17], &x1[30], _r, cos_bit); - btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x1[18], &x1[29], _r, cos_bit); - btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x1[21], &x1[26], _r, cos_bit); - btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x1[22], &x1[25], _r, cos_bit); - - // stage 7 - btf_16_w16_avx2(cospi_p60_p04, cospi_m04_p60, &x1[8], &x1[15], _r, cos_bit); - btf_16_w16_avx2(cospi_p28_p36, cospi_m36_p28, &x1[9], &x1[14], _r, cos_bit); - btf_16_w16_avx2(cospi_p44_p20, cospi_m20_p44, &x1[10], &x1[13], _r, cos_bit); - btf_16_w16_avx2(cospi_p12_p52, cospi_m52_p12, &x1[11], &x1[12], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[16], &x1[17]); - btf_16_adds_subs_avx2(&x1[19], &x1[18]); - btf_16_adds_subs_avx2(&x1[20], &x1[21]); - btf_16_adds_subs_avx2(&x1[23], &x1[22]); - btf_16_adds_subs_avx2(&x1[24], &x1[25]); - btf_16_adds_subs_avx2(&x1[27], &x1[26]); - btf_16_adds_subs_avx2(&x1[28], &x1[29]); - btf_16_adds_subs_avx2(&x1[31], &x1[30]); - - // stage 8 - btf_16_w16_avx2(cospi_p62_p02, cospi_m02_p62, &x1[16], &x1[31], _r, cos_bit); - btf_16_w16_avx2(cospi_p30_p34, cospi_m34_p30, &x1[17], &x1[30], _r, cos_bit); - btf_16_w16_avx2(cospi_p46_p18, cospi_m18_p46, &x1[18], &x1[29], _r, cos_bit); - btf_16_w16_avx2(cospi_p14_p50, cospi_m50_p14, &x1[19], &x1[28], _r, cos_bit); - btf_16_w16_avx2(cospi_p54_p10, cospi_m10_p54, &x1[20], &x1[27], _r, cos_bit); - btf_16_w16_avx2(cospi_p22_p42, cospi_m42_p22, &x1[21], &x1[26], _r, cos_bit); - btf_16_w16_avx2(cospi_p38_p26, cospi_m26_p38, &x1[22], &x1[25], _r, cos_bit); - btf_16_w16_avx2(cospi_p06_p58, cospi_m58_p06, &x1[23], &x1[24], _r, cos_bit); - - // stage 9 - output[0] = x1[0]; - output[1] = x1[16]; - output[2] = x1[8]; - output[3] = x1[24]; - output[4] = x1[4]; - output[5] = x1[20]; - output[6] = x1[12]; - output[7] = x1[28]; - output[8] = x1[2]; - output[9] = x1[18]; - output[10] = x1[10]; - output[11] = x1[26]; - output[12] = x1[6]; - output[13] = x1[22]; - output[14] = x1[14]; - output[15] = x1[30]; - output[16] = x1[1]; - output[17] = x1[17]; - output[18] = x1[9]; - output[19] = x1[25]; - output[20] = x1[5]; - output[21] = x1[21]; - output[22] = x1[13]; - output[23] = x1[29]; - output[24] = x1[3]; - output[25] = x1[19]; - output[26] = x1[11]; - output[27] = x1[27]; - output[28] = x1[7]; - output[29] = x1[23]; - output[30] = x1[15]; - output[31] = x1[31]; -} - -static INLINE void fdct16x64_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1)); - - __m256i cospi_m32_p32 = pair_set_w16_epi16(-cospi[32], cospi[32]); - __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - __m256i cospi_m16_p48 = pair_set_w16_epi16(-cospi[16], cospi[48]); - __m256i cospi_p48_p16 = pair_set_w16_epi16(cospi[48], cospi[16]); - __m256i cospi_m48_m16 = pair_set_w16_epi16(-cospi[48], -cospi[16]); - __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]); - __m256i cospi_m08_p56 = pair_set_w16_epi16(-cospi[8], cospi[56]); - __m256i cospi_p56_p08 = pair_set_w16_epi16(cospi[56], cospi[8]); - __m256i cospi_m56_m08 = pair_set_w16_epi16(-cospi[56], -cospi[8]); - __m256i cospi_m40_p24 = pair_set_w16_epi16(-cospi[40], cospi[24]); - __m256i cospi_p24_p40 = pair_set_w16_epi16(cospi[24], cospi[40]); - __m256i cospi_m24_m40 = pair_set_w16_epi16(-cospi[24], -cospi[40]); - __m256i cospi_p60_p04 = pair_set_w16_epi16(cospi[60], cospi[4]); - __m256i cospi_m04_p60 = pair_set_w16_epi16(-cospi[4], cospi[60]); - __m256i cospi_p28_p36 = pair_set_w16_epi16(cospi[28], cospi[36]); - __m256i cospi_m36_p28 = pair_set_w16_epi16(-cospi[36], cospi[28]); - __m256i cospi_p44_p20 = pair_set_w16_epi16(cospi[44], cospi[20]); - __m256i cospi_m20_p44 = pair_set_w16_epi16(-cospi[20], cospi[44]); - __m256i cospi_p12_p52 = pair_set_w16_epi16(cospi[12], cospi[52]); - __m256i cospi_m52_p12 = pair_set_w16_epi16(-cospi[52], cospi[12]); - __m256i cospi_m60_m04 = pair_set_w16_epi16(-cospi[60], -cospi[4]); - __m256i cospi_m28_m36 = pair_set_w16_epi16(-cospi[28], -cospi[36]); - __m256i cospi_m44_m20 = pair_set_w16_epi16(-cospi[44], -cospi[20]); - __m256i cospi_m12_m52 = pair_set_w16_epi16(-cospi[12], -cospi[52]); - __m256i cospi_p62_p02 = pair_set_w16_epi16(cospi[62], cospi[2]); - __m256i cospi_m02_p62 = pair_set_w16_epi16(-cospi[2], cospi[62]); - __m256i cospi_p30_p34 = pair_set_w16_epi16(cospi[30], cospi[34]); - __m256i cospi_m34_p30 = pair_set_w16_epi16(-cospi[34], cospi[30]); - __m256i cospi_p46_p18 = pair_set_w16_epi16(cospi[46], cospi[18]); - __m256i cospi_m18_p46 = pair_set_w16_epi16(-cospi[18], cospi[46]); - __m256i cospi_p14_p50 = pair_set_w16_epi16(cospi[14], cospi[50]); - __m256i cospi_m50_p14 = pair_set_w16_epi16(-cospi[50], cospi[14]); - __m256i cospi_p54_p10 = pair_set_w16_epi16(cospi[54], cospi[10]); - __m256i cospi_m10_p54 = pair_set_w16_epi16(-cospi[10], cospi[54]); - __m256i cospi_p22_p42 = pair_set_w16_epi16(cospi[22], cospi[42]); - __m256i cospi_m42_p22 = pair_set_w16_epi16(-cospi[42], cospi[22]); - __m256i cospi_p38_p26 = pair_set_w16_epi16(cospi[38], cospi[26]); - __m256i cospi_m26_p38 = pair_set_w16_epi16(-cospi[26], cospi[38]); - __m256i cospi_p06_p58 = pair_set_w16_epi16(cospi[6], cospi[58]); - __m256i cospi_m58_p06 = pair_set_w16_epi16(-cospi[58], cospi[6]); - __m256i cospi_p63_p01 = pair_set_w16_epi16(cospi[63], cospi[1]); - __m256i cospi_m01_p63 = pair_set_w16_epi16(-cospi[1], cospi[63]); - __m256i cospi_p31_p33 = pair_set_w16_epi16(cospi[31], cospi[33]); - __m256i cospi_m33_p31 = pair_set_w16_epi16(-cospi[33], cospi[31]); - __m256i cospi_p47_p17 = pair_set_w16_epi16(cospi[47], cospi[17]); - __m256i cospi_m17_p47 = pair_set_w16_epi16(-cospi[17], cospi[47]); - __m256i cospi_p15_p49 = pair_set_w16_epi16(cospi[15], cospi[49]); - __m256i cospi_m49_p15 = pair_set_w16_epi16(-cospi[49], cospi[15]); - __m256i cospi_p55_p09 = pair_set_w16_epi16(cospi[55], cospi[9]); - __m256i cospi_m09_p55 = pair_set_w16_epi16(-cospi[9], cospi[55]); - __m256i cospi_p23_p41 = pair_set_w16_epi16(cospi[23], cospi[41]); - __m256i cospi_m41_p23 = pair_set_w16_epi16(-cospi[41], cospi[23]); - __m256i cospi_p39_p25 = pair_set_w16_epi16(cospi[39], cospi[25]); - __m256i cospi_m25_p39 = pair_set_w16_epi16(-cospi[25], cospi[39]); - __m256i cospi_p07_p57 = pair_set_w16_epi16(cospi[7], cospi[57]); - __m256i cospi_m57_p07 = pair_set_w16_epi16(-cospi[57], cospi[7]); - __m256i cospi_p59_p05 = pair_set_w16_epi16(cospi[59], cospi[5]); - __m256i cospi_m05_p59 = pair_set_w16_epi16(-cospi[5], cospi[59]); - __m256i cospi_p27_p37 = pair_set_w16_epi16(cospi[27], cospi[37]); - __m256i cospi_m37_p27 = pair_set_w16_epi16(-cospi[37], cospi[27]); - __m256i cospi_p43_p21 = pair_set_w16_epi16(cospi[43], cospi[21]); - __m256i cospi_m21_p43 = pair_set_w16_epi16(-cospi[21], cospi[43]); - __m256i cospi_p11_p53 = pair_set_w16_epi16(cospi[11], cospi[53]); - __m256i cospi_m53_p11 = pair_set_w16_epi16(-cospi[53], cospi[11]); - __m256i cospi_p51_p13 = pair_set_w16_epi16(cospi[51], cospi[13]); - __m256i cospi_m13_p51 = pair_set_w16_epi16(-cospi[13], cospi[51]); - __m256i cospi_p19_p45 = pair_set_w16_epi16(cospi[19], cospi[45]); - __m256i cospi_m45_p19 = pair_set_w16_epi16(-cospi[45], cospi[19]); - __m256i cospi_p35_p29 = pair_set_w16_epi16(cospi[35], cospi[29]); - __m256i cospi_m29_p35 = pair_set_w16_epi16(-cospi[29], cospi[35]); - __m256i cospi_p03_p61 = pair_set_w16_epi16(cospi[3], cospi[61]); - __m256i cospi_m61_p03 = pair_set_w16_epi16(-cospi[61], cospi[3]); - - // stage 1 - __m256i x1[64]; - btf_16_adds_subs_out_avx2(&x1[0], &x1[63], input[0], input[63]); - btf_16_adds_subs_out_avx2(&x1[1], &x1[62], input[1], input[62]); - btf_16_adds_subs_out_avx2(&x1[2], &x1[61], input[2], input[61]); - btf_16_adds_subs_out_avx2(&x1[3], &x1[60], input[3], input[60]); - btf_16_adds_subs_out_avx2(&x1[4], &x1[59], input[4], input[59]); - btf_16_adds_subs_out_avx2(&x1[5], &x1[58], input[5], input[58]); - btf_16_adds_subs_out_avx2(&x1[6], &x1[57], input[6], input[57]); - btf_16_adds_subs_out_avx2(&x1[7], &x1[56], input[7], input[56]); - btf_16_adds_subs_out_avx2(&x1[8], &x1[55], input[8], input[55]); - btf_16_adds_subs_out_avx2(&x1[9], &x1[54], input[9], input[54]); - btf_16_adds_subs_out_avx2(&x1[10], &x1[53], input[10], input[53]); - btf_16_adds_subs_out_avx2(&x1[11], &x1[52], input[11], input[52]); - btf_16_adds_subs_out_avx2(&x1[12], &x1[51], input[12], input[51]); - btf_16_adds_subs_out_avx2(&x1[13], &x1[50], input[13], input[50]); - btf_16_adds_subs_out_avx2(&x1[14], &x1[49], input[14], input[49]); - btf_16_adds_subs_out_avx2(&x1[15], &x1[48], input[15], input[48]); - btf_16_adds_subs_out_avx2(&x1[16], &x1[47], input[16], input[47]); - btf_16_adds_subs_out_avx2(&x1[17], &x1[46], input[17], input[46]); - btf_16_adds_subs_out_avx2(&x1[18], &x1[45], input[18], input[45]); - btf_16_adds_subs_out_avx2(&x1[19], &x1[44], input[19], input[44]); - btf_16_adds_subs_out_avx2(&x1[20], &x1[43], input[20], input[43]); - btf_16_adds_subs_out_avx2(&x1[21], &x1[42], input[21], input[42]); - btf_16_adds_subs_out_avx2(&x1[22], &x1[41], input[22], input[41]); - btf_16_adds_subs_out_avx2(&x1[23], &x1[40], input[23], input[40]); - btf_16_adds_subs_out_avx2(&x1[24], &x1[39], input[24], input[39]); - btf_16_adds_subs_out_avx2(&x1[25], &x1[38], input[25], input[38]); - btf_16_adds_subs_out_avx2(&x1[26], &x1[37], input[26], input[37]); - btf_16_adds_subs_out_avx2(&x1[27], &x1[36], input[27], input[36]); - btf_16_adds_subs_out_avx2(&x1[28], &x1[35], input[28], input[35]); - btf_16_adds_subs_out_avx2(&x1[29], &x1[34], input[29], input[34]); - btf_16_adds_subs_out_avx2(&x1[30], &x1[33], input[30], input[33]); - btf_16_adds_subs_out_avx2(&x1[31], &x1[32], input[31], input[32]); - - // stage 2 - btf_16_adds_subs_avx2(&x1[0], &x1[31]); - btf_16_adds_subs_avx2(&x1[1], &x1[30]); - btf_16_adds_subs_avx2(&x1[2], &x1[29]); - btf_16_adds_subs_avx2(&x1[3], &x1[28]); - btf_16_adds_subs_avx2(&x1[4], &x1[27]); - btf_16_adds_subs_avx2(&x1[5], &x1[26]); - btf_16_adds_subs_avx2(&x1[6], &x1[25]); - btf_16_adds_subs_avx2(&x1[7], &x1[24]); - btf_16_adds_subs_avx2(&x1[8], &x1[23]); - btf_16_adds_subs_avx2(&x1[9], &x1[22]); - btf_16_adds_subs_avx2(&x1[10], &x1[21]); - btf_16_adds_subs_avx2(&x1[11], &x1[20]); - btf_16_adds_subs_avx2(&x1[12], &x1[19]); - btf_16_adds_subs_avx2(&x1[13], &x1[18]); - btf_16_adds_subs_avx2(&x1[14], &x1[17]); - btf_16_adds_subs_avx2(&x1[15], &x1[16]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[40], &x1[55], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[41], &x1[54], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[42], &x1[53], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[43], &x1[52], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[44], &x1[51], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[45], &x1[50], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[46], &x1[49], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[47], &x1[48], _r, cos_bit); - - // stage 3 - btf_16_adds_subs_avx2(&x1[0], &x1[15]); - btf_16_adds_subs_avx2(&x1[1], &x1[14]); - btf_16_adds_subs_avx2(&x1[2], &x1[13]); - btf_16_adds_subs_avx2(&x1[3], &x1[12]); - btf_16_adds_subs_avx2(&x1[4], &x1[11]); - btf_16_adds_subs_avx2(&x1[5], &x1[10]); - btf_16_adds_subs_avx2(&x1[6], &x1[9]); - btf_16_adds_subs_avx2(&x1[7], &x1[8]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[20], &x1[27], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[21], &x1[26], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[22], &x1[25], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[23], &x1[24], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[32], &x1[47]); - btf_16_adds_subs_avx2(&x1[33], &x1[46]); - btf_16_adds_subs_avx2(&x1[34], &x1[45]); - btf_16_adds_subs_avx2(&x1[35], &x1[44]); - btf_16_adds_subs_avx2(&x1[36], &x1[43]); - btf_16_adds_subs_avx2(&x1[37], &x1[42]); - btf_16_adds_subs_avx2(&x1[38], &x1[41]); - btf_16_adds_subs_avx2(&x1[39], &x1[40]); - btf_16_adds_subs_avx2(&x1[63], &x1[48]); - btf_16_adds_subs_avx2(&x1[62], &x1[49]); - btf_16_adds_subs_avx2(&x1[61], &x1[50]); - btf_16_adds_subs_avx2(&x1[60], &x1[51]); - btf_16_adds_subs_avx2(&x1[59], &x1[52]); - btf_16_adds_subs_avx2(&x1[58], &x1[53]); - btf_16_adds_subs_avx2(&x1[57], &x1[54]); - btf_16_adds_subs_avx2(&x1[56], &x1[55]); - - // stage 4 - btf_16_adds_subs_avx2(&x1[0], &x1[7]); - btf_16_adds_subs_avx2(&x1[1], &x1[6]); - btf_16_adds_subs_avx2(&x1[2], &x1[5]); - btf_16_adds_subs_avx2(&x1[3], &x1[4]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[10], &x1[13], _r, cos_bit); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[11], &x1[12], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[16], &x1[23]); - btf_16_adds_subs_avx2(&x1[17], &x1[22]); - btf_16_adds_subs_avx2(&x1[18], &x1[21]); - btf_16_adds_subs_avx2(&x1[19], &x1[20]); - btf_16_adds_subs_avx2(&x1[31], &x1[24]); - btf_16_adds_subs_avx2(&x1[30], &x1[25]); - btf_16_adds_subs_avx2(&x1[29], &x1[26]); - btf_16_adds_subs_avx2(&x1[28], &x1[27]); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[36], &x1[59], _r, cos_bit); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[37], &x1[58], _r, cos_bit); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[38], &x1[57], _r, cos_bit); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[39], &x1[56], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[40], &x1[55], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[41], &x1[54], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[42], &x1[53], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[43], &x1[52], _r, cos_bit); - - // stage 5 - btf_16_adds_subs_avx2(&x1[0], &x1[3]); - btf_16_adds_subs_avx2(&x1[1], &x1[2]); - btf_16_w16_avx2(cospi_m32_p32, cospi_p32_p32, &x1[5], &x1[6], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[8], &x1[11]); - btf_16_adds_subs_avx2(&x1[9], &x1[10]); - btf_16_adds_subs_avx2(&x1[15], &x1[12]); - btf_16_adds_subs_avx2(&x1[14], &x1[13]); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[18], &x1[29], _r, cos_bit); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[19], &x1[28], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[20], &x1[27], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[21], &x1[26], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[32], &x1[39]); - btf_16_adds_subs_avx2(&x1[33], &x1[38]); - btf_16_adds_subs_avx2(&x1[34], &x1[37]); - btf_16_adds_subs_avx2(&x1[35], &x1[36]); - btf_16_adds_subs_avx2(&x1[47], &x1[40]); - btf_16_adds_subs_avx2(&x1[46], &x1[41]); - btf_16_adds_subs_avx2(&x1[45], &x1[42]); - btf_16_adds_subs_avx2(&x1[44], &x1[43]); - btf_16_adds_subs_avx2(&x1[48], &x1[55]); - btf_16_adds_subs_avx2(&x1[49], &x1[54]); - btf_16_adds_subs_avx2(&x1[50], &x1[53]); - btf_16_adds_subs_avx2(&x1[51], &x1[52]); - btf_16_adds_subs_avx2(&x1[63], &x1[56]); - btf_16_adds_subs_avx2(&x1[62], &x1[57]); - btf_16_adds_subs_avx2(&x1[61], &x1[58]); - btf_16_adds_subs_avx2(&x1[60], &x1[59]); - - // stage 6 - btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[0], &x1[1], _r, cos_bit); - btf_16_w16_avx2(cospi_p48_p16, cospi_m16_p48, &x1[2], &x1[3], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[4], &x1[5]); - btf_16_adds_subs_avx2(&x1[7], &x1[6]); - btf_16_w16_avx2(cospi_m16_p48, cospi_p48_p16, &x1[9], &x1[14], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_m16, cospi_m16_p48, &x1[10], &x1[13], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[16], &x1[19]); - btf_16_adds_subs_avx2(&x1[17], &x1[18]); - btf_16_adds_subs_avx2(&x1[23], &x1[20]); - btf_16_adds_subs_avx2(&x1[22], &x1[21]); - btf_16_adds_subs_avx2(&x1[24], &x1[27]); - btf_16_adds_subs_avx2(&x1[25], &x1[26]); - btf_16_adds_subs_avx2(&x1[31], &x1[28]); - btf_16_adds_subs_avx2(&x1[30], &x1[29]); - btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x1[34], &x1[61], _r, cos_bit); - btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x1[35], &x1[60], _r, cos_bit); - btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x1[36], &x1[59], _r, cos_bit); - btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x1[37], &x1[58], _r, cos_bit); - btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x1[42], &x1[53], _r, cos_bit); - btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x1[43], &x1[52], _r, cos_bit); - btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x1[44], &x1[51], _r, cos_bit); - btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x1[45], &x1[50], _r, cos_bit); - - // stage 7 - btf_16_w16_avx2(cospi_p56_p08, cospi_m08_p56, &x1[4], &x1[7], _r, cos_bit); - btf_16_w16_avx2(cospi_p24_p40, cospi_m40_p24, &x1[5], &x1[6], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[8], &x1[9]); - btf_16_adds_subs_avx2(&x1[11], &x1[10]); - btf_16_adds_subs_avx2(&x1[12], &x1[13]); - btf_16_adds_subs_avx2(&x1[15], &x1[14]); - btf_16_w16_avx2(cospi_m08_p56, cospi_p56_p08, &x1[17], &x1[30], _r, cos_bit); - btf_16_w16_avx2(cospi_m56_m08, cospi_m08_p56, &x1[18], &x1[29], _r, cos_bit); - btf_16_w16_avx2(cospi_m40_p24, cospi_p24_p40, &x1[21], &x1[26], _r, cos_bit); - btf_16_w16_avx2(cospi_m24_m40, cospi_m40_p24, &x1[22], &x1[25], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[32], &x1[35]); - btf_16_adds_subs_avx2(&x1[33], &x1[34]); - btf_16_adds_subs_avx2(&x1[39], &x1[36]); - btf_16_adds_subs_avx2(&x1[38], &x1[37]); - btf_16_adds_subs_avx2(&x1[40], &x1[43]); - btf_16_adds_subs_avx2(&x1[41], &x1[42]); - btf_16_adds_subs_avx2(&x1[47], &x1[44]); - btf_16_adds_subs_avx2(&x1[46], &x1[45]); - btf_16_adds_subs_avx2(&x1[48], &x1[51]); - btf_16_adds_subs_avx2(&x1[49], &x1[50]); - btf_16_adds_subs_avx2(&x1[55], &x1[52]); - btf_16_adds_subs_avx2(&x1[54], &x1[53]); - btf_16_adds_subs_avx2(&x1[56], &x1[59]); - btf_16_adds_subs_avx2(&x1[57], &x1[58]); - btf_16_adds_subs_avx2(&x1[63], &x1[60]); - btf_16_adds_subs_avx2(&x1[62], &x1[61]); - - // stage 8 - btf_16_w16_avx2(cospi_p60_p04, cospi_m04_p60, &x1[8], &x1[15], _r, cos_bit); - btf_16_w16_avx2(cospi_p28_p36, cospi_m36_p28, &x1[9], &x1[14], _r, cos_bit); - btf_16_w16_avx2(cospi_p44_p20, cospi_m20_p44, &x1[10], &x1[13], _r, cos_bit); - btf_16_w16_avx2(cospi_p12_p52, cospi_m52_p12, &x1[11], &x1[12], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[16], &x1[17]); - btf_16_adds_subs_avx2(&x1[19], &x1[18]); - btf_16_adds_subs_avx2(&x1[20], &x1[21]); - btf_16_adds_subs_avx2(&x1[23], &x1[22]); - btf_16_adds_subs_avx2(&x1[24], &x1[25]); - btf_16_adds_subs_avx2(&x1[27], &x1[26]); - btf_16_adds_subs_avx2(&x1[28], &x1[29]); - btf_16_adds_subs_avx2(&x1[31], &x1[30]); - btf_16_w16_avx2(cospi_m04_p60, cospi_p60_p04, &x1[33], &x1[62], _r, cos_bit); - btf_16_w16_avx2(cospi_m60_m04, cospi_m04_p60, &x1[34], &x1[61], _r, cos_bit); - btf_16_w16_avx2(cospi_m36_p28, cospi_p28_p36, &x1[37], &x1[58], _r, cos_bit); - btf_16_w16_avx2(cospi_m28_m36, cospi_m36_p28, &x1[38], &x1[57], _r, cos_bit); - btf_16_w16_avx2(cospi_m20_p44, cospi_p44_p20, &x1[41], &x1[54], _r, cos_bit); - btf_16_w16_avx2(cospi_m44_m20, cospi_m20_p44, &x1[42], &x1[53], _r, cos_bit); - btf_16_w16_avx2(cospi_m52_p12, cospi_p12_p52, &x1[45], &x1[50], _r, cos_bit); - btf_16_w16_avx2(cospi_m12_m52, cospi_m52_p12, &x1[46], &x1[49], _r, cos_bit); - - // stage 9 - btf_16_w16_avx2(cospi_p62_p02, cospi_m02_p62, &x1[16], &x1[31], _r, cos_bit); - btf_16_w16_avx2(cospi_p30_p34, cospi_m34_p30, &x1[17], &x1[30], _r, cos_bit); - btf_16_w16_avx2(cospi_p46_p18, cospi_m18_p46, &x1[18], &x1[29], _r, cos_bit); - btf_16_w16_avx2(cospi_p14_p50, cospi_m50_p14, &x1[19], &x1[28], _r, cos_bit); - btf_16_w16_avx2(cospi_p54_p10, cospi_m10_p54, &x1[20], &x1[27], _r, cos_bit); - btf_16_w16_avx2(cospi_p22_p42, cospi_m42_p22, &x1[21], &x1[26], _r, cos_bit); - btf_16_w16_avx2(cospi_p38_p26, cospi_m26_p38, &x1[22], &x1[25], _r, cos_bit); - btf_16_w16_avx2(cospi_p06_p58, cospi_m58_p06, &x1[23], &x1[24], _r, cos_bit); - btf_16_adds_subs_avx2(&x1[32], &x1[33]); - btf_16_adds_subs_avx2(&x1[35], &x1[34]); - btf_16_adds_subs_avx2(&x1[36], &x1[37]); - btf_16_adds_subs_avx2(&x1[39], &x1[38]); - btf_16_adds_subs_avx2(&x1[40], &x1[41]); - btf_16_adds_subs_avx2(&x1[43], &x1[42]); - btf_16_adds_subs_avx2(&x1[44], &x1[45]); - btf_16_adds_subs_avx2(&x1[47], &x1[46]); - btf_16_adds_subs_avx2(&x1[48], &x1[49]); - btf_16_adds_subs_avx2(&x1[51], &x1[50]); - btf_16_adds_subs_avx2(&x1[52], &x1[53]); - btf_16_adds_subs_avx2(&x1[55], &x1[54]); - btf_16_adds_subs_avx2(&x1[56], &x1[57]); - btf_16_adds_subs_avx2(&x1[59], &x1[58]); - btf_16_adds_subs_avx2(&x1[60], &x1[61]); - btf_16_adds_subs_avx2(&x1[63], &x1[62]); - - // stage 10 - btf_16_w16_avx2(cospi_p63_p01, cospi_m01_p63, &x1[32], &x1[63], _r, cos_bit); - btf_16_w16_avx2(cospi_p31_p33, cospi_m33_p31, &x1[33], &x1[62], _r, cos_bit); - btf_16_w16_avx2(cospi_p47_p17, cospi_m17_p47, &x1[34], &x1[61], _r, cos_bit); - btf_16_w16_avx2(cospi_p15_p49, cospi_m49_p15, &x1[35], &x1[60], _r, cos_bit); - btf_16_w16_avx2(cospi_p55_p09, cospi_m09_p55, &x1[36], &x1[59], _r, cos_bit); - btf_16_w16_avx2(cospi_p23_p41, cospi_m41_p23, &x1[37], &x1[58], _r, cos_bit); - btf_16_w16_avx2(cospi_p39_p25, cospi_m25_p39, &x1[38], &x1[57], _r, cos_bit); - btf_16_w16_avx2(cospi_p07_p57, cospi_m57_p07, &x1[39], &x1[56], _r, cos_bit); - btf_16_w16_avx2(cospi_p59_p05, cospi_m05_p59, &x1[40], &x1[55], _r, cos_bit); - btf_16_w16_avx2(cospi_p27_p37, cospi_m37_p27, &x1[41], &x1[54], _r, cos_bit); - btf_16_w16_avx2(cospi_p43_p21, cospi_m21_p43, &x1[42], &x1[53], _r, cos_bit); - btf_16_w16_avx2(cospi_p11_p53, cospi_m53_p11, &x1[43], &x1[52], _r, cos_bit); - btf_16_w16_avx2(cospi_p51_p13, cospi_m13_p51, &x1[44], &x1[51], _r, cos_bit); - btf_16_w16_avx2(cospi_p19_p45, cospi_m45_p19, &x1[45], &x1[50], _r, cos_bit); - btf_16_w16_avx2(cospi_p35_p29, cospi_m29_p35, &x1[46], &x1[49], _r, cos_bit); - btf_16_w16_avx2(cospi_p03_p61, cospi_m61_p03, &x1[47], &x1[48], _r, cos_bit); - - // stage 11 - output[0] = x1[0]; - output[1] = x1[32]; - output[2] = x1[16]; - output[3] = x1[48]; - output[4] = x1[8]; - output[5] = x1[40]; - output[6] = x1[24]; - output[7] = x1[56]; - output[8] = x1[4]; - output[9] = x1[36]; - output[10] = x1[20]; - output[11] = x1[52]; - output[12] = x1[12]; - output[13] = x1[44]; - output[14] = x1[28]; - output[15] = x1[60]; - output[16] = x1[2]; - output[17] = x1[34]; - output[18] = x1[18]; - output[19] = x1[50]; - output[20] = x1[10]; - output[21] = x1[42]; - output[22] = x1[26]; - output[23] = x1[58]; - output[24] = x1[6]; - output[25] = x1[38]; - output[26] = x1[22]; - output[27] = x1[54]; - output[28] = x1[14]; - output[29] = x1[46]; - output[30] = x1[30]; - output[31] = x1[62]; - output[32] = x1[1]; - output[33] = x1[33]; - output[34] = x1[17]; - output[35] = x1[49]; - output[36] = x1[9]; - output[37] = x1[41]; - output[38] = x1[25]; - output[39] = x1[57]; - output[40] = x1[5]; - output[41] = x1[37]; - output[42] = x1[21]; - output[43] = x1[53]; - output[44] = x1[13]; - output[45] = x1[45]; - output[46] = x1[29]; - output[47] = x1[61]; - output[48] = x1[3]; - output[49] = x1[35]; - output[50] = x1[19]; - output[51] = x1[51]; - output[52] = x1[11]; - output[53] = x1[43]; - output[54] = x1[27]; - output[55] = x1[59]; - output[56] = x1[7]; - output[57] = x1[39]; - output[58] = x1[23]; - output[59] = x1[55]; - output[60] = x1[15]; - output[61] = x1[47]; - output[62] = x1[31]; - output[63] = x1[63]; -} - -static INLINE void av1_fdct32_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - __m256i x1[32]; - const int32_t *cospi = cospi_arr(cos_bit); - const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1)); - // stage 0 - // stage 1 - btf_32_add_sub_out_avx2(&x1[0], &x1[31], input[0], input[31]); - btf_32_add_sub_out_avx2(&x1[1], &x1[30], input[1], input[30]); - btf_32_add_sub_out_avx2(&x1[2], &x1[29], input[2], input[29]); - btf_32_add_sub_out_avx2(&x1[3], &x1[28], input[3], input[28]); - btf_32_add_sub_out_avx2(&x1[4], &x1[27], input[4], input[27]); - btf_32_add_sub_out_avx2(&x1[5], &x1[26], input[5], input[26]); - btf_32_add_sub_out_avx2(&x1[6], &x1[25], input[6], input[25]); - btf_32_add_sub_out_avx2(&x1[7], &x1[24], input[7], input[24]); - btf_32_add_sub_out_avx2(&x1[8], &x1[23], input[8], input[23]); - btf_32_add_sub_out_avx2(&x1[9], &x1[22], input[9], input[22]); - btf_32_add_sub_out_avx2(&x1[10], &x1[21], input[10], input[21]); - btf_32_add_sub_out_avx2(&x1[11], &x1[20], input[11], input[20]); - btf_32_add_sub_out_avx2(&x1[12], &x1[19], input[12], input[19]); - btf_32_add_sub_out_avx2(&x1[13], &x1[18], input[13], input[18]); - btf_32_add_sub_out_avx2(&x1[14], &x1[17], input[14], input[17]); - btf_32_add_sub_out_avx2(&x1[15], &x1[16], input[15], input[16]); - - // stage 2 - btf_32_add_sub_avx2(&x1[0], &x1[15]); - btf_32_add_sub_avx2(&x1[1], &x1[14]); - btf_32_add_sub_avx2(&x1[2], &x1[13]); - btf_32_add_sub_avx2(&x1[3], &x1[12]); - btf_32_add_sub_avx2(&x1[4], &x1[11]); - btf_32_add_sub_avx2(&x1[5], &x1[10]); - btf_32_add_sub_avx2(&x1[6], &x1[9]); - btf_32_add_sub_avx2(&x1[7], &x1[8]); - btf_32_avx2_type0(-cospi[32], cospi[32], &x1[20], &x1[27], _r, cos_bit); - btf_32_avx2_type0(-cospi[32], cospi[32], &x1[21], &x1[26], _r, cos_bit); - btf_32_avx2_type0(-cospi[32], cospi[32], &x1[22], &x1[25], _r, cos_bit); - btf_32_avx2_type0(-cospi[32], cospi[32], &x1[23], &x1[24], _r, cos_bit); - - // stage 3 - btf_32_add_sub_avx2(&x1[0], &x1[7]); - btf_32_add_sub_avx2(&x1[1], &x1[6]); - btf_32_add_sub_avx2(&x1[2], &x1[5]); - btf_32_add_sub_avx2(&x1[3], &x1[4]); - btf_32_avx2_type0(-cospi[32], cospi[32], &x1[10], &x1[13], _r, cos_bit); - btf_32_avx2_type0(-cospi[32], cospi[32], &x1[11], &x1[12], _r, cos_bit); - btf_32_add_sub_avx2(&x1[16], &x1[23]); - btf_32_add_sub_avx2(&x1[17], &x1[22]); - btf_32_add_sub_avx2(&x1[18], &x1[21]); - btf_32_add_sub_avx2(&x1[19], &x1[20]); - btf_32_add_sub_avx2(&x1[31], &x1[24]); - btf_32_add_sub_avx2(&x1[30], &x1[25]); - btf_32_add_sub_avx2(&x1[29], &x1[26]); - btf_32_add_sub_avx2(&x1[28], &x1[27]); - - // stage 4 - btf_32_add_sub_avx2(&x1[0], &x1[3]); - btf_32_add_sub_avx2(&x1[1], &x1[2]); - btf_32_avx2_type0(-cospi[32], cospi[32], &x1[5], &x1[6], _r, cos_bit); - btf_32_add_sub_avx2(&x1[8], &x1[11]); - btf_32_add_sub_avx2(&x1[9], &x1[10]); - btf_32_add_sub_avx2(&x1[15], &x1[12]); - btf_32_add_sub_avx2(&x1[14], &x1[13]); - btf_32_avx2_type0(-cospi[16], cospi[48], &x1[18], &x1[29], _r, cos_bit); - btf_32_avx2_type0(-cospi[16], cospi[48], &x1[19], &x1[28], _r, cos_bit); - btf_32_avx2_type0(-cospi[48], -cospi[16], &x1[20], &x1[27], _r, cos_bit); - btf_32_avx2_type0(-cospi[48], -cospi[16], &x1[21], &x1[26], _r, cos_bit); - - // stage 5 - btf_32_avx2_type0(cospi[32], cospi[32], &x1[0], &x1[1], _r, cos_bit); - btf_32_avx2_type1(cospi[48], cospi[16], &x1[2], &x1[3], _r, cos_bit); - btf_32_add_sub_avx2(&x1[4], &x1[5]); - btf_32_add_sub_avx2(&x1[7], &x1[6]); - btf_32_avx2_type0(-cospi[16], cospi[48], &x1[9], &x1[14], _r, cos_bit); - btf_32_avx2_type0(-cospi[48], -cospi[16], &x1[10], &x1[13], _r, cos_bit); - btf_32_add_sub_avx2(&x1[16], &x1[19]); - btf_32_add_sub_avx2(&x1[17], &x1[18]); - btf_32_add_sub_avx2(&x1[23], &x1[20]); - btf_32_add_sub_avx2(&x1[22], &x1[21]); - btf_32_add_sub_avx2(&x1[24], &x1[27]); - btf_32_add_sub_avx2(&x1[25], &x1[26]); - btf_32_add_sub_avx2(&x1[31], &x1[28]); - btf_32_add_sub_avx2(&x1[30], &x1[29]); - - // stage 6 - btf_32_avx2_type1(cospi[56], cospi[8], &x1[4], &x1[7], _r, cos_bit); - btf_32_avx2_type1(cospi[24], cospi[40], &x1[5], &x1[6], _r, cos_bit); - btf_32_add_sub_avx2(&x1[8], &x1[9]); - btf_32_add_sub_avx2(&x1[11], &x1[10]); - btf_32_add_sub_avx2(&x1[12], &x1[13]); - btf_32_add_sub_avx2(&x1[15], &x1[14]); - btf_32_avx2_type0(-cospi[8], cospi[56], &x1[17], &x1[30], _r, cos_bit); - btf_32_avx2_type0(-cospi[56], -cospi[8], &x1[18], &x1[29], _r, cos_bit); - btf_32_avx2_type0(-cospi[40], cospi[24], &x1[21], &x1[26], _r, cos_bit); - btf_32_avx2_type0(-cospi[24], -cospi[40], &x1[22], &x1[25], _r, cos_bit); - - // stage 7 - btf_32_avx2_type1(cospi[60], cospi[4], &x1[8], &x1[15], _r, cos_bit); - btf_32_avx2_type1(cospi[28], cospi[36], &x1[9], &x1[14], _r, cos_bit); - btf_32_avx2_type1(cospi[44], cospi[20], &x1[10], &x1[13], _r, cos_bit); - btf_32_avx2_type1(cospi[12], cospi[52], &x1[11], &x1[12], _r, cos_bit); - btf_32_add_sub_avx2(&x1[16], &x1[17]); - btf_32_add_sub_avx2(&x1[19], &x1[18]); - btf_32_add_sub_avx2(&x1[20], &x1[21]); - btf_32_add_sub_avx2(&x1[23], &x1[22]); - btf_32_add_sub_avx2(&x1[24], &x1[25]); - btf_32_add_sub_avx2(&x1[27], &x1[26]); - btf_32_add_sub_avx2(&x1[28], &x1[29]); - btf_32_add_sub_avx2(&x1[31], &x1[30]); - - // stage 8 - btf_32_avx2_type1(cospi[62], cospi[2], &x1[16], &x1[31], _r, cos_bit); - btf_32_avx2_type1(cospi[30], cospi[34], &x1[17], &x1[30], _r, cos_bit); - btf_32_avx2_type1(cospi[46], cospi[18], &x1[18], &x1[29], _r, cos_bit); - btf_32_avx2_type1(cospi[14], cospi[50], &x1[19], &x1[28], _r, cos_bit); - btf_32_avx2_type1(cospi[54], cospi[10], &x1[20], &x1[27], _r, cos_bit); - btf_32_avx2_type1(cospi[22], cospi[42], &x1[21], &x1[26], _r, cos_bit); - btf_32_avx2_type1(cospi[38], cospi[26], &x1[22], &x1[25], _r, cos_bit); - btf_32_avx2_type1(cospi[6], cospi[58], &x1[23], &x1[24], _r, cos_bit); - - // stage 9 - output[0] = x1[0]; - output[1] = x1[16]; - output[2] = x1[8]; - output[3] = x1[24]; - output[4] = x1[4]; - output[5] = x1[20]; - output[6] = x1[12]; - output[7] = x1[28]; - output[8] = x1[2]; - output[9] = x1[18]; - output[10] = x1[10]; - output[11] = x1[26]; - output[12] = x1[6]; - output[13] = x1[22]; - output[14] = x1[14]; - output[15] = x1[30]; - output[16] = x1[1]; - output[17] = x1[17]; - output[18] = x1[9]; - output[19] = x1[25]; - output[20] = x1[5]; - output[21] = x1[21]; - output[22] = x1[13]; - output[23] = x1[29]; - output[24] = x1[3]; - output[25] = x1[19]; - output[26] = x1[11]; - output[27] = x1[27]; - output[28] = x1[7]; - output[29] = x1[23]; - output[30] = x1[15]; - output[31] = x1[31]; -} - -static INLINE void av1_fdct64_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1)); - - __m256i cospi_m32 = _mm256_set1_epi32(-cospi[32]); - __m256i cospi_p32 = _mm256_set1_epi32(cospi[32]); - __m256i cospi_m16 = _mm256_set1_epi32(-cospi[16]); - __m256i cospi_p48 = _mm256_set1_epi32(cospi[48]); - __m256i cospi_m48 = _mm256_set1_epi32(-cospi[48]); - __m256i cospi_p16 = _mm256_set1_epi32(cospi[16]); - __m256i cospi_m08 = _mm256_set1_epi32(-cospi[8]); - __m256i cospi_p56 = _mm256_set1_epi32(cospi[56]); - __m256i cospi_m56 = _mm256_set1_epi32(-cospi[56]); - __m256i cospi_m40 = _mm256_set1_epi32(-cospi[40]); - __m256i cospi_p24 = _mm256_set1_epi32(cospi[24]); - __m256i cospi_m24 = _mm256_set1_epi32(-cospi[24]); - __m256i cospi_p08 = _mm256_set1_epi32(cospi[8]); - __m256i cospi_p40 = _mm256_set1_epi32(cospi[40]); - __m256i cospi_p60 = _mm256_set1_epi32(cospi[60]); - __m256i cospi_p04 = _mm256_set1_epi32(cospi[4]); - __m256i cospi_p28 = _mm256_set1_epi32(cospi[28]); - __m256i cospi_p36 = _mm256_set1_epi32(cospi[36]); - __m256i cospi_p44 = _mm256_set1_epi32(cospi[44]); - __m256i cospi_p20 = _mm256_set1_epi32(cospi[20]); - __m256i cospi_p12 = _mm256_set1_epi32(cospi[12]); - __m256i cospi_p52 = _mm256_set1_epi32(cospi[52]); - __m256i cospi_m04 = _mm256_set1_epi32(-cospi[4]); - __m256i cospi_m60 = _mm256_set1_epi32(-cospi[60]); - __m256i cospi_m36 = _mm256_set1_epi32(-cospi[36]); - __m256i cospi_m28 = _mm256_set1_epi32(-cospi[28]); - __m256i cospi_m20 = _mm256_set1_epi32(-cospi[20]); - __m256i cospi_m44 = _mm256_set1_epi32(-cospi[44]); - __m256i cospi_m52 = _mm256_set1_epi32(-cospi[52]); - __m256i cospi_m12 = _mm256_set1_epi32(-cospi[12]); - __m256i cospi_p62 = _mm256_set1_epi32(cospi[62]); - __m256i cospi_p02 = _mm256_set1_epi32(cospi[2]); - __m256i cospi_p30 = _mm256_set1_epi32(cospi[30]); - __m256i cospi_p34 = _mm256_set1_epi32(cospi[34]); - __m256i cospi_p46 = _mm256_set1_epi32(cospi[46]); - __m256i cospi_p18 = _mm256_set1_epi32(cospi[18]); - __m256i cospi_p14 = _mm256_set1_epi32(cospi[14]); - __m256i cospi_p50 = _mm256_set1_epi32(cospi[50]); - __m256i cospi_p54 = _mm256_set1_epi32(cospi[54]); - __m256i cospi_p10 = _mm256_set1_epi32(cospi[10]); - __m256i cospi_p22 = _mm256_set1_epi32(cospi[22]); - __m256i cospi_p42 = _mm256_set1_epi32(cospi[42]); - __m256i cospi_p38 = _mm256_set1_epi32(cospi[38]); - __m256i cospi_p26 = _mm256_set1_epi32(cospi[26]); - __m256i cospi_p06 = _mm256_set1_epi32(cospi[6]); - __m256i cospi_p58 = _mm256_set1_epi32(cospi[58]); - __m256i cospi_p63 = _mm256_set1_epi32(cospi[63]); - __m256i cospi_p01 = _mm256_set1_epi32(cospi[1]); - __m256i cospi_p31 = _mm256_set1_epi32(cospi[31]); - __m256i cospi_p33 = _mm256_set1_epi32(cospi[33]); - __m256i cospi_p47 = _mm256_set1_epi32(cospi[47]); - __m256i cospi_p17 = _mm256_set1_epi32(cospi[17]); - __m256i cospi_p15 = _mm256_set1_epi32(cospi[15]); - __m256i cospi_p49 = _mm256_set1_epi32(cospi[49]); - __m256i cospi_p55 = _mm256_set1_epi32(cospi[55]); - __m256i cospi_p09 = _mm256_set1_epi32(cospi[9]); - __m256i cospi_p23 = _mm256_set1_epi32(cospi[23]); - __m256i cospi_p41 = _mm256_set1_epi32(cospi[41]); - __m256i cospi_p39 = _mm256_set1_epi32(cospi[39]); - __m256i cospi_p25 = _mm256_set1_epi32(cospi[25]); - __m256i cospi_p07 = _mm256_set1_epi32(cospi[7]); - __m256i cospi_p57 = _mm256_set1_epi32(cospi[57]); - __m256i cospi_p59 = _mm256_set1_epi32(cospi[59]); - __m256i cospi_p05 = _mm256_set1_epi32(cospi[5]); - __m256i cospi_p27 = _mm256_set1_epi32(cospi[27]); - __m256i cospi_p37 = _mm256_set1_epi32(cospi[37]); - __m256i cospi_p43 = _mm256_set1_epi32(cospi[43]); - __m256i cospi_p21 = _mm256_set1_epi32(cospi[21]); - __m256i cospi_p11 = _mm256_set1_epi32(cospi[11]); - __m256i cospi_p53 = _mm256_set1_epi32(cospi[53]); - __m256i cospi_p51 = _mm256_set1_epi32(cospi[51]); - __m256i cospi_p13 = _mm256_set1_epi32(cospi[13]); - __m256i cospi_p19 = _mm256_set1_epi32(cospi[19]); - __m256i cospi_p45 = _mm256_set1_epi32(cospi[45]); - __m256i cospi_p35 = _mm256_set1_epi32(cospi[35]); - __m256i cospi_p29 = _mm256_set1_epi32(cospi[29]); - __m256i cospi_p03 = _mm256_set1_epi32(cospi[3]); - __m256i cospi_p61 = _mm256_set1_epi32(cospi[61]); - - // stage 1 - __m256i x1[64]; - btf_32_add_sub_out_avx2(&x1[0], &x1[63], input[0], input[63]); - btf_32_add_sub_out_avx2(&x1[1], &x1[62], input[1], input[62]); - btf_32_add_sub_out_avx2(&x1[2], &x1[61], input[2], input[61]); - btf_32_add_sub_out_avx2(&x1[3], &x1[60], input[3], input[60]); - btf_32_add_sub_out_avx2(&x1[4], &x1[59], input[4], input[59]); - btf_32_add_sub_out_avx2(&x1[5], &x1[58], input[5], input[58]); - btf_32_add_sub_out_avx2(&x1[6], &x1[57], input[6], input[57]); - btf_32_add_sub_out_avx2(&x1[7], &x1[56], input[7], input[56]); - btf_32_add_sub_out_avx2(&x1[8], &x1[55], input[8], input[55]); - btf_32_add_sub_out_avx2(&x1[9], &x1[54], input[9], input[54]); - btf_32_add_sub_out_avx2(&x1[10], &x1[53], input[10], input[53]); - btf_32_add_sub_out_avx2(&x1[11], &x1[52], input[11], input[52]); - btf_32_add_sub_out_avx2(&x1[12], &x1[51], input[12], input[51]); - btf_32_add_sub_out_avx2(&x1[13], &x1[50], input[13], input[50]); - btf_32_add_sub_out_avx2(&x1[14], &x1[49], input[14], input[49]); - btf_32_add_sub_out_avx2(&x1[15], &x1[48], input[15], input[48]); - btf_32_add_sub_out_avx2(&x1[16], &x1[47], input[16], input[47]); - btf_32_add_sub_out_avx2(&x1[17], &x1[46], input[17], input[46]); - btf_32_add_sub_out_avx2(&x1[18], &x1[45], input[18], input[45]); - btf_32_add_sub_out_avx2(&x1[19], &x1[44], input[19], input[44]); - btf_32_add_sub_out_avx2(&x1[20], &x1[43], input[20], input[43]); - btf_32_add_sub_out_avx2(&x1[21], &x1[42], input[21], input[42]); - btf_32_add_sub_out_avx2(&x1[22], &x1[41], input[22], input[41]); - btf_32_add_sub_out_avx2(&x1[23], &x1[40], input[23], input[40]); - btf_32_add_sub_out_avx2(&x1[24], &x1[39], input[24], input[39]); - btf_32_add_sub_out_avx2(&x1[25], &x1[38], input[25], input[38]); - btf_32_add_sub_out_avx2(&x1[26], &x1[37], input[26], input[37]); - btf_32_add_sub_out_avx2(&x1[27], &x1[36], input[27], input[36]); - btf_32_add_sub_out_avx2(&x1[28], &x1[35], input[28], input[35]); - btf_32_add_sub_out_avx2(&x1[29], &x1[34], input[29], input[34]); - btf_32_add_sub_out_avx2(&x1[30], &x1[33], input[30], input[33]); - btf_32_add_sub_out_avx2(&x1[31], &x1[32], input[31], input[32]); - - // stage 2 - btf_32_add_sub_avx2(&x1[0], &x1[31]); - btf_32_add_sub_avx2(&x1[1], &x1[30]); - btf_32_add_sub_avx2(&x1[2], &x1[29]); - btf_32_add_sub_avx2(&x1[3], &x1[28]); - btf_32_add_sub_avx2(&x1[4], &x1[27]); - btf_32_add_sub_avx2(&x1[5], &x1[26]); - btf_32_add_sub_avx2(&x1[6], &x1[25]); - btf_32_add_sub_avx2(&x1[7], &x1[24]); - btf_32_add_sub_avx2(&x1[8], &x1[23]); - btf_32_add_sub_avx2(&x1[9], &x1[22]); - btf_32_add_sub_avx2(&x1[10], &x1[21]); - btf_32_add_sub_avx2(&x1[11], &x1[20]); - btf_32_add_sub_avx2(&x1[12], &x1[19]); - btf_32_add_sub_avx2(&x1[13], &x1[18]); - btf_32_add_sub_avx2(&x1[14], &x1[17]); - btf_32_add_sub_avx2(&x1[15], &x1[16]); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[40], &x1[55], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[41], &x1[54], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[42], &x1[53], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[43], &x1[52], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[44], &x1[51], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[45], &x1[50], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[46], &x1[49], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[47], &x1[48], _r, cos_bit); - - // stage 3 - btf_32_add_sub_avx2(&x1[0], &x1[15]); - btf_32_add_sub_avx2(&x1[1], &x1[14]); - btf_32_add_sub_avx2(&x1[2], &x1[13]); - btf_32_add_sub_avx2(&x1[3], &x1[12]); - btf_32_add_sub_avx2(&x1[4], &x1[11]); - btf_32_add_sub_avx2(&x1[5], &x1[10]); - btf_32_add_sub_avx2(&x1[6], &x1[9]); - btf_32_add_sub_avx2(&x1[7], &x1[8]); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[20], &x1[27], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[21], &x1[26], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[22], &x1[25], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[23], &x1[24], _r, cos_bit); - btf_32_add_sub_avx2(&x1[32], &x1[47]); - btf_32_add_sub_avx2(&x1[33], &x1[46]); - btf_32_add_sub_avx2(&x1[34], &x1[45]); - btf_32_add_sub_avx2(&x1[35], &x1[44]); - btf_32_add_sub_avx2(&x1[36], &x1[43]); - btf_32_add_sub_avx2(&x1[37], &x1[42]); - btf_32_add_sub_avx2(&x1[38], &x1[41]); - btf_32_add_sub_avx2(&x1[39], &x1[40]); - btf_32_add_sub_avx2(&x1[63], &x1[48]); - btf_32_add_sub_avx2(&x1[62], &x1[49]); - btf_32_add_sub_avx2(&x1[61], &x1[50]); - btf_32_add_sub_avx2(&x1[60], &x1[51]); - btf_32_add_sub_avx2(&x1[59], &x1[52]); - btf_32_add_sub_avx2(&x1[58], &x1[53]); - btf_32_add_sub_avx2(&x1[57], &x1[54]); - btf_32_add_sub_avx2(&x1[56], &x1[55]); - - // stage 4 - btf_32_add_sub_avx2(&x1[0], &x1[7]); - btf_32_add_sub_avx2(&x1[1], &x1[6]); - btf_32_add_sub_avx2(&x1[2], &x1[5]); - btf_32_add_sub_avx2(&x1[3], &x1[4]); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[10], &x1[13], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[11], &x1[12], _r, cos_bit); - btf_32_add_sub_avx2(&x1[16], &x1[23]); - btf_32_add_sub_avx2(&x1[17], &x1[22]); - btf_32_add_sub_avx2(&x1[18], &x1[21]); - btf_32_add_sub_avx2(&x1[19], &x1[20]); - btf_32_add_sub_avx2(&x1[31], &x1[24]); - btf_32_add_sub_avx2(&x1[30], &x1[25]); - btf_32_add_sub_avx2(&x1[29], &x1[26]); - btf_32_add_sub_avx2(&x1[28], &x1[27]); - btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[36], &x1[59], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[37], &x1[58], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[38], &x1[57], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[39], &x1[56], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[40], &x1[55], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[41], &x1[54], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[42], &x1[53], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[43], &x1[52], _r, cos_bit); - - // stage 5 - btf_32_add_sub_avx2(&x1[0], &x1[3]); - btf_32_add_sub_avx2(&x1[1], &x1[2]); - btf_32_avx2_type0_new(cospi_m32, cospi_p32, &x1[5], &x1[6], _r, cos_bit); - btf_32_add_sub_avx2(&x1[8], &x1[11]); - btf_32_add_sub_avx2(&x1[9], &x1[10]); - btf_32_add_sub_avx2(&x1[15], &x1[12]); - btf_32_add_sub_avx2(&x1[14], &x1[13]); - btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[18], &x1[29], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[19], &x1[28], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[20], &x1[27], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[21], &x1[26], _r, cos_bit); - btf_32_add_sub_avx2(&x1[32], &x1[39]); - btf_32_add_sub_avx2(&x1[33], &x1[38]); - btf_32_add_sub_avx2(&x1[34], &x1[37]); - btf_32_add_sub_avx2(&x1[35], &x1[36]); - btf_32_add_sub_avx2(&x1[47], &x1[40]); - btf_32_add_sub_avx2(&x1[46], &x1[41]); - btf_32_add_sub_avx2(&x1[45], &x1[42]); - btf_32_add_sub_avx2(&x1[44], &x1[43]); - btf_32_add_sub_avx2(&x1[48], &x1[55]); - btf_32_add_sub_avx2(&x1[49], &x1[54]); - btf_32_add_sub_avx2(&x1[50], &x1[53]); - btf_32_add_sub_avx2(&x1[51], &x1[52]); - btf_32_add_sub_avx2(&x1[63], &x1[56]); - btf_32_add_sub_avx2(&x1[62], &x1[57]); - btf_32_add_sub_avx2(&x1[61], &x1[58]); - btf_32_add_sub_avx2(&x1[60], &x1[59]); - - // stage 6 - btf_32_avx2_type0_new(cospi_p32, cospi_p32, &x1[0], &x1[1], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p48, cospi_p16, &x1[2], &x1[3], _r, cos_bit); - btf_32_add_sub_avx2(&x1[4], &x1[5]); - btf_32_add_sub_avx2(&x1[7], &x1[6]); - btf_32_avx2_type0_new(cospi_m16, cospi_p48, &x1[9], &x1[14], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m48, cospi_m16, &x1[10], &x1[13], _r, cos_bit); - btf_32_add_sub_avx2(&x1[16], &x1[19]); - btf_32_add_sub_avx2(&x1[17], &x1[18]); - btf_32_add_sub_avx2(&x1[23], &x1[20]); - btf_32_add_sub_avx2(&x1[22], &x1[21]); - btf_32_add_sub_avx2(&x1[24], &x1[27]); - btf_32_add_sub_avx2(&x1[25], &x1[26]); - btf_32_add_sub_avx2(&x1[31], &x1[28]); - btf_32_add_sub_avx2(&x1[30], &x1[29]); - btf_32_avx2_type0_new(cospi_m08, cospi_p56, &x1[34], &x1[61], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m08, cospi_p56, &x1[35], &x1[60], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m56, cospi_m08, &x1[36], &x1[59], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m56, cospi_m08, &x1[37], &x1[58], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m40, cospi_p24, &x1[42], &x1[53], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m40, cospi_p24, &x1[43], &x1[52], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m24, cospi_m40, &x1[44], &x1[51], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m24, cospi_m40, &x1[45], &x1[50], _r, cos_bit); - - // stage 7 - btf_32_avx2_type1_new(cospi_p56, cospi_p08, &x1[4], &x1[7], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p24, cospi_p40, &x1[5], &x1[6], _r, cos_bit); - btf_32_add_sub_avx2(&x1[8], &x1[9]); - btf_32_add_sub_avx2(&x1[11], &x1[10]); - btf_32_add_sub_avx2(&x1[12], &x1[13]); - btf_32_add_sub_avx2(&x1[15], &x1[14]); - btf_32_avx2_type0_new(cospi_m08, cospi_p56, &x1[17], &x1[30], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m56, cospi_m08, &x1[18], &x1[29], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m40, cospi_p24, &x1[21], &x1[26], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m24, cospi_m40, &x1[22], &x1[25], _r, cos_bit); - btf_32_add_sub_avx2(&x1[32], &x1[35]); - btf_32_add_sub_avx2(&x1[33], &x1[34]); - btf_32_add_sub_avx2(&x1[39], &x1[36]); - btf_32_add_sub_avx2(&x1[38], &x1[37]); - btf_32_add_sub_avx2(&x1[40], &x1[43]); - btf_32_add_sub_avx2(&x1[41], &x1[42]); - btf_32_add_sub_avx2(&x1[47], &x1[44]); - btf_32_add_sub_avx2(&x1[46], &x1[45]); - btf_32_add_sub_avx2(&x1[48], &x1[51]); - btf_32_add_sub_avx2(&x1[49], &x1[50]); - btf_32_add_sub_avx2(&x1[55], &x1[52]); - btf_32_add_sub_avx2(&x1[54], &x1[53]); - btf_32_add_sub_avx2(&x1[56], &x1[59]); - btf_32_add_sub_avx2(&x1[57], &x1[58]); - btf_32_add_sub_avx2(&x1[63], &x1[60]); - btf_32_add_sub_avx2(&x1[62], &x1[61]); - - // stage 8 - btf_32_avx2_type1_new(cospi_p60, cospi_p04, &x1[8], &x1[15], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p28, cospi_p36, &x1[9], &x1[14], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p44, cospi_p20, &x1[10], &x1[13], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p12, cospi_p52, &x1[11], &x1[12], _r, cos_bit); - btf_32_add_sub_avx2(&x1[16], &x1[17]); - btf_32_add_sub_avx2(&x1[19], &x1[18]); - btf_32_add_sub_avx2(&x1[20], &x1[21]); - btf_32_add_sub_avx2(&x1[23], &x1[22]); - btf_32_add_sub_avx2(&x1[24], &x1[25]); - btf_32_add_sub_avx2(&x1[27], &x1[26]); - btf_32_add_sub_avx2(&x1[28], &x1[29]); - btf_32_add_sub_avx2(&x1[31], &x1[30]); - btf_32_avx2_type0_new(cospi_m04, cospi_p60, &x1[33], &x1[62], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m60, cospi_m04, &x1[34], &x1[61], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m36, cospi_p28, &x1[37], &x1[58], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m28, cospi_m36, &x1[38], &x1[57], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m20, cospi_p44, &x1[41], &x1[54], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m44, cospi_m20, &x1[42], &x1[53], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m52, cospi_p12, &x1[45], &x1[50], _r, cos_bit); - btf_32_avx2_type0_new(cospi_m12, cospi_m52, &x1[46], &x1[49], _r, cos_bit); - - // stage 9 - btf_32_avx2_type1_new(cospi_p62, cospi_p02, &x1[16], &x1[31], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p30, cospi_p34, &x1[17], &x1[30], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p46, cospi_p18, &x1[18], &x1[29], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p14, cospi_p50, &x1[19], &x1[28], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p54, cospi_p10, &x1[20], &x1[27], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p22, cospi_p42, &x1[21], &x1[26], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p38, cospi_p26, &x1[22], &x1[25], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p06, cospi_p58, &x1[23], &x1[24], _r, cos_bit); - btf_32_add_sub_avx2(&x1[32], &x1[33]); - btf_32_add_sub_avx2(&x1[35], &x1[34]); - btf_32_add_sub_avx2(&x1[36], &x1[37]); - btf_32_add_sub_avx2(&x1[39], &x1[38]); - btf_32_add_sub_avx2(&x1[40], &x1[41]); - btf_32_add_sub_avx2(&x1[43], &x1[42]); - btf_32_add_sub_avx2(&x1[44], &x1[45]); - btf_32_add_sub_avx2(&x1[47], &x1[46]); - btf_32_add_sub_avx2(&x1[48], &x1[49]); - btf_32_add_sub_avx2(&x1[51], &x1[50]); - btf_32_add_sub_avx2(&x1[52], &x1[53]); - btf_32_add_sub_avx2(&x1[55], &x1[54]); - btf_32_add_sub_avx2(&x1[56], &x1[57]); - btf_32_add_sub_avx2(&x1[59], &x1[58]); - btf_32_add_sub_avx2(&x1[60], &x1[61]); - btf_32_add_sub_avx2(&x1[63], &x1[62]); - - // stage 10 - btf_32_avx2_type1_new(cospi_p63, cospi_p01, &x1[32], &x1[63], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p31, cospi_p33, &x1[33], &x1[62], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p47, cospi_p17, &x1[34], &x1[61], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p15, cospi_p49, &x1[35], &x1[60], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p55, cospi_p09, &x1[36], &x1[59], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p23, cospi_p41, &x1[37], &x1[58], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p39, cospi_p25, &x1[38], &x1[57], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p07, cospi_p57, &x1[39], &x1[56], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p59, cospi_p05, &x1[40], &x1[55], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p27, cospi_p37, &x1[41], &x1[54], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p43, cospi_p21, &x1[42], &x1[53], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p11, cospi_p53, &x1[43], &x1[52], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p51, cospi_p13, &x1[44], &x1[51], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p19, cospi_p45, &x1[45], &x1[50], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p35, cospi_p29, &x1[46], &x1[49], _r, cos_bit); - btf_32_avx2_type1_new(cospi_p03, cospi_p61, &x1[47], &x1[48], _r, cos_bit); - - // stage 11 - output[0] = x1[0]; - output[1] = x1[32]; - output[2] = x1[16]; - output[3] = x1[48]; - output[4] = x1[8]; - output[5] = x1[40]; - output[6] = x1[24]; - output[7] = x1[56]; - output[8] = x1[4]; - output[9] = x1[36]; - output[10] = x1[20]; - output[11] = x1[52]; - output[12] = x1[12]; - output[13] = x1[44]; - output[14] = x1[28]; - output[15] = x1[60]; - output[16] = x1[2]; - output[17] = x1[34]; - output[18] = x1[18]; - output[19] = x1[50]; - output[20] = x1[10]; - output[21] = x1[42]; - output[22] = x1[26]; - output[23] = x1[58]; - output[24] = x1[6]; - output[25] = x1[38]; - output[26] = x1[22]; - output[27] = x1[54]; - output[28] = x1[14]; - output[29] = x1[46]; - output[30] = x1[30]; - output[31] = x1[62]; - output[32] = x1[1]; - output[33] = x1[33]; - output[34] = x1[17]; - output[35] = x1[49]; - output[36] = x1[9]; - output[37] = x1[41]; - output[38] = x1[25]; - output[39] = x1[57]; - output[40] = x1[5]; - output[41] = x1[37]; - output[42] = x1[21]; - output[43] = x1[53]; - output[44] = x1[13]; - output[45] = x1[45]; - output[46] = x1[29]; - output[47] = x1[61]; - output[48] = x1[3]; - output[49] = x1[35]; - output[50] = x1[19]; - output[51] = x1[51]; - output[52] = x1[11]; - output[53] = x1[43]; - output[54] = x1[27]; - output[55] = x1[59]; - output[56] = x1[7]; - output[57] = x1[39]; - output[58] = x1[23]; - output[59] = x1[55]; - output[60] = x1[15]; - output[61] = x1[47]; - output[62] = x1[31]; - output[63] = x1[63]; -} - -static INLINE void fadst16x16_new_avx2(const __m256i *input, __m256i *output, - int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m256i __zero = _mm256_setzero_si256(); - const __m256i _r = _mm256_set1_epi32(1 << (cos_bit - 1)); - - __m256i cospi_p32_p32 = pair_set_w16_epi16(cospi[32], cospi[32]); - __m256i cospi_p32_m32 = pair_set_w16_epi16(cospi[32], -cospi[32]); - __m256i cospi_p16_p48 = pair_set_w16_epi16(cospi[16], cospi[48]); - __m256i cospi_p48_m16 = pair_set_w16_epi16(cospi[48], -cospi[16]); - __m256i cospi_m48_p16 = pair_set_w16_epi16(-cospi[48], cospi[16]); - __m256i cospi_p08_p56 = pair_set_w16_epi16(cospi[8], cospi[56]); - __m256i cospi_p56_m08 = pair_set_w16_epi16(cospi[56], -cospi[8]); - __m256i cospi_p40_p24 = pair_set_w16_epi16(cospi[40], cospi[24]); - __m256i cospi_p24_m40 = pair_set_w16_epi16(cospi[24], -cospi[40]); - __m256i cospi_m56_p08 = pair_set_w16_epi16(-cospi[56], cospi[8]); - __m256i cospi_m24_p40 = pair_set_w16_epi16(-cospi[24], cospi[40]); - __m256i cospi_p02_p62 = pair_set_w16_epi16(cospi[2], cospi[62]); - __m256i cospi_p62_m02 = pair_set_w16_epi16(cospi[62], -cospi[2]); - __m256i cospi_p10_p54 = pair_set_w16_epi16(cospi[10], cospi[54]); - __m256i cospi_p54_m10 = pair_set_w16_epi16(cospi[54], -cospi[10]); - __m256i cospi_p18_p46 = pair_set_w16_epi16(cospi[18], cospi[46]); - __m256i cospi_p46_m18 = pair_set_w16_epi16(cospi[46], -cospi[18]); - __m256i cospi_p26_p38 = pair_set_w16_epi16(cospi[26], cospi[38]); - __m256i cospi_p38_m26 = pair_set_w16_epi16(cospi[38], -cospi[26]); - __m256i cospi_p34_p30 = pair_set_w16_epi16(cospi[34], cospi[30]); - __m256i cospi_p30_m34 = pair_set_w16_epi16(cospi[30], -cospi[34]); - __m256i cospi_p42_p22 = pair_set_w16_epi16(cospi[42], cospi[22]); - __m256i cospi_p22_m42 = pair_set_w16_epi16(cospi[22], -cospi[42]); - __m256i cospi_p50_p14 = pair_set_w16_epi16(cospi[50], cospi[14]); - __m256i cospi_p14_m50 = pair_set_w16_epi16(cospi[14], -cospi[50]); - __m256i cospi_p58_p06 = pair_set_w16_epi16(cospi[58], cospi[6]); - __m256i cospi_p06_m58 = pair_set_w16_epi16(cospi[6], -cospi[58]); - - // stage 1 - __m256i x1[16]; - x1[0] = input[0]; - x1[1] = _mm256_subs_epi16(__zero, input[15]); - x1[2] = _mm256_subs_epi16(__zero, input[7]); - x1[3] = input[8]; - x1[4] = _mm256_subs_epi16(__zero, input[3]); - x1[5] = input[12]; - x1[6] = input[4]; - x1[7] = _mm256_subs_epi16(__zero, input[11]); - x1[8] = _mm256_subs_epi16(__zero, input[1]); - x1[9] = input[14]; - x1[10] = input[6]; - x1[11] = _mm256_subs_epi16(__zero, input[9]); - x1[12] = input[2]; - x1[13] = _mm256_subs_epi16(__zero, input[13]); - x1[14] = _mm256_subs_epi16(__zero, input[5]); - x1[15] = input[10]; - - // stage 2 - btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[2], &x1[3], _r, cos_bit); - btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[6], &x1[7], _r, cos_bit); - btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[10], &x1[11], _r, cos_bit); - btf_16_w16_avx2(cospi_p32_p32, cospi_p32_m32, &x1[14], &x1[15], _r, cos_bit); - - // stage 3 - btf_16_adds_subs_avx2(&x1[0], &x1[2]); - btf_16_adds_subs_avx2(&x1[1], &x1[3]); - btf_16_adds_subs_avx2(&x1[4], &x1[6]); - btf_16_adds_subs_avx2(&x1[5], &x1[7]); - btf_16_adds_subs_avx2(&x1[8], &x1[10]); - btf_16_adds_subs_avx2(&x1[9], &x1[11]); - btf_16_adds_subs_avx2(&x1[12], &x1[14]); - btf_16_adds_subs_avx2(&x1[13], &x1[15]); - - // stage 4 - btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x1[4], &x1[5], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_p16, cospi_p16_p48, &x1[6], &x1[7], _r, cos_bit); - btf_16_w16_avx2(cospi_p16_p48, cospi_p48_m16, &x1[12], &x1[13], _r, cos_bit); - btf_16_w16_avx2(cospi_m48_p16, cospi_p16_p48, &x1[14], &x1[15], _r, cos_bit); - - // stage 5 - btf_16_adds_subs_avx2(&x1[0], &x1[4]); - btf_16_adds_subs_avx2(&x1[1], &x1[5]); - btf_16_adds_subs_avx2(&x1[2], &x1[6]); - btf_16_adds_subs_avx2(&x1[3], &x1[7]); - btf_16_adds_subs_avx2(&x1[8], &x1[12]); - btf_16_adds_subs_avx2(&x1[9], &x1[13]); - btf_16_adds_subs_avx2(&x1[10], &x1[14]); - btf_16_adds_subs_avx2(&x1[11], &x1[15]); - - // stage 6 - btf_16_w16_avx2(cospi_p08_p56, cospi_p56_m08, &x1[8], &x1[9], _r, cos_bit); - btf_16_w16_avx2(cospi_p40_p24, cospi_p24_m40, &x1[10], &x1[11], _r, cos_bit); - btf_16_w16_avx2(cospi_m56_p08, cospi_p08_p56, &x1[12], &x1[13], _r, cos_bit); - btf_16_w16_avx2(cospi_m24_p40, cospi_p40_p24, &x1[14], &x1[15], _r, cos_bit); - - // stage 7 - btf_16_adds_subs_avx2(&x1[0], &x1[8]); - btf_16_adds_subs_avx2(&x1[1], &x1[9]); - btf_16_adds_subs_avx2(&x1[2], &x1[10]); - btf_16_adds_subs_avx2(&x1[3], &x1[11]); - btf_16_adds_subs_avx2(&x1[4], &x1[12]); - btf_16_adds_subs_avx2(&x1[5], &x1[13]); - btf_16_adds_subs_avx2(&x1[6], &x1[14]); - btf_16_adds_subs_avx2(&x1[7], &x1[15]); - - // stage 8 - btf_16_w16_avx2(cospi_p02_p62, cospi_p62_m02, &x1[0], &x1[1], _r, cos_bit); - btf_16_w16_avx2(cospi_p10_p54, cospi_p54_m10, &x1[2], &x1[3], _r, cos_bit); - btf_16_w16_avx2(cospi_p18_p46, cospi_p46_m18, &x1[4], &x1[5], _r, cos_bit); - btf_16_w16_avx2(cospi_p26_p38, cospi_p38_m26, &x1[6], &x1[7], _r, cos_bit); - btf_16_w16_avx2(cospi_p34_p30, cospi_p30_m34, &x1[8], &x1[9], _r, cos_bit); - btf_16_w16_avx2(cospi_p42_p22, cospi_p22_m42, &x1[10], &x1[11], _r, cos_bit); - btf_16_w16_avx2(cospi_p50_p14, cospi_p14_m50, &x1[12], &x1[13], _r, cos_bit); - btf_16_w16_avx2(cospi_p58_p06, cospi_p06_m58, &x1[14], &x1[15], _r, cos_bit); - - // stage 9 - output[0] = x1[1]; - output[1] = x1[14]; - output[2] = x1[3]; - output[3] = x1[12]; - output[4] = x1[5]; - output[5] = x1[10]; - output[6] = x1[7]; - output[7] = x1[8]; - output[8] = x1[9]; - output[9] = x1[6]; - output[10] = x1[11]; - output[11] = x1[4]; - output[12] = x1[13]; - output[13] = x1[2]; - output[14] = x1[15]; - output[15] = x1[0]; -} - -static INLINE __m256i scale_round_avx2(const __m256i a, const int scale) { - const __m256i scale__r = pair_set_w16_epi16(scale, 1 << (NewSqrt2Bits - 1)); - const __m256i b = _mm256_madd_epi16(a, scale__r); - return _mm256_srai_epi32(b, NewSqrt2Bits); -} - -static INLINE void fidentity16x16_new_avx2(const __m256i *input, - __m256i *output, int8_t cos_bit) { - (void)cos_bit; - const __m256i one = _mm256_set1_epi16(1); - - for (int i = 0; i < 16; ++i) { - const __m256i a_lo = _mm256_unpacklo_epi16(input[i], one); - const __m256i a_hi = _mm256_unpackhi_epi16(input[i], one); - const __m256i b_lo = scale_round_avx2(a_lo, 2 * NewSqrt2); - const __m256i b_hi = scale_round_avx2(a_hi, 2 * NewSqrt2); - output[i] = _mm256_packs_epi32(b_lo, b_hi); - } -} - -static INLINE void fidentity16x32_new_avx2(const __m256i *input, - __m256i *output, int8_t cos_bit) { - (void)cos_bit; - for (int i = 0; i < 32; ++i) { - output[i] = _mm256_slli_epi16(input[i], 2); - } -} - -static INLINE void av1_round_shift_array_32_avx2(__m256i *input, - __m256i *output, - const int size, - const int bit) { - if (bit > 0) { - int i; - for (i = 0; i < size; i++) { - output[i] = av1_round_shift_32_avx2(input[i], bit); - } - } else { - int i; - for (i = 0; i < size; i++) { - output[i] = _mm256_slli_epi32(input[i], -bit); - } - } -} - -static INLINE void av1_round_shift_rect_array_32_avx2(__m256i *input, - __m256i *output, - const int size, - const int bit) { - const __m256i sqrt2 = _mm256_set1_epi32(NewSqrt2); - if (bit > 0) { - int i; - for (i = 0; i < size; i++) { - const __m256i r0 = av1_round_shift_32_avx2(input[i], bit); - const __m256i r1 = _mm256_mullo_epi32(sqrt2, r0); - output[i] = av1_round_shift_32_avx2(r1, NewSqrt2Bits); - } - } else { - int i; - for (i = 0; i < size; i++) { - const __m256i r0 = _mm256_slli_epi32(input[i], -bit); - const __m256i r1 = _mm256_mullo_epi32(sqrt2, r0); - output[i] = av1_round_shift_32_avx2(r1, NewSqrt2Bits); - } - } -} - -static INLINE void transpose_32_8x8_avx2(int stride, const __m256i *inputA, - __m256i *output) { - __m256i temp0 = _mm256_unpacklo_epi32(inputA[0], inputA[2]); - __m256i temp1 = _mm256_unpackhi_epi32(inputA[0], inputA[2]); - __m256i temp2 = _mm256_unpacklo_epi32(inputA[1], inputA[3]); - __m256i temp3 = _mm256_unpackhi_epi32(inputA[1], inputA[3]); - __m256i temp4 = _mm256_unpacklo_epi32(inputA[4], inputA[6]); - __m256i temp5 = _mm256_unpackhi_epi32(inputA[4], inputA[6]); - __m256i temp6 = _mm256_unpacklo_epi32(inputA[5], inputA[7]); - __m256i temp7 = _mm256_unpackhi_epi32(inputA[5], inputA[7]); - - __m256i t0 = _mm256_unpacklo_epi32(temp0, temp2); - __m256i t1 = _mm256_unpackhi_epi32(temp0, temp2); - __m256i t2 = _mm256_unpacklo_epi32(temp1, temp3); - __m256i t3 = _mm256_unpackhi_epi32(temp1, temp3); - __m256i t4 = _mm256_unpacklo_epi32(temp4, temp6); - __m256i t5 = _mm256_unpackhi_epi32(temp4, temp6); - __m256i t6 = _mm256_unpacklo_epi32(temp5, temp7); - __m256i t7 = _mm256_unpackhi_epi32(temp5, temp7); - - output[0 * stride] = _mm256_permute2x128_si256(t0, t4, 0x20); - output[1 * stride] = _mm256_permute2x128_si256(t1, t5, 0x20); - output[2 * stride] = _mm256_permute2x128_si256(t2, t6, 0x20); - output[3 * stride] = _mm256_permute2x128_si256(t3, t7, 0x20); - output[4 * stride] = _mm256_permute2x128_si256(t0, t4, 0x31); - output[5 * stride] = _mm256_permute2x128_si256(t1, t5, 0x31); - output[6 * stride] = _mm256_permute2x128_si256(t2, t6, 0x31); - output[7 * stride] = _mm256_permute2x128_si256(t3, t7, 0x31); -} - -// Store 8 16 bit values. Sign extend the values. -static INLINE void store_buffer_16bit_to_32bit_w16_avx2(const __m256i *const in, - int32_t *out, - const int stride, - const int out_size) { - for (int i = 0; i < out_size; ++i) { - _mm256_store_si256((__m256i *)(out), - _mm256_cvtepi16_epi32(_mm256_castsi256_si128(in[i]))); - _mm256_store_si256( - (__m256i *)(out + 8), - _mm256_cvtepi16_epi32(_mm256_extracti128_si256(in[i], 1))); - out += stride; - } -} - -static INLINE void store_rect_16bit_to_32bit_avx2(const __m256i a, - int32_t *const b) { - const __m256i one = _mm256_set1_epi16(1); - const __m256i a_reoder = _mm256_permute4x64_epi64(a, 0xd8); - const __m256i a_lo = _mm256_unpacklo_epi16(a_reoder, one); - const __m256i a_hi = _mm256_unpackhi_epi16(a_reoder, one); - const __m256i b_lo = scale_round_avx2(a_lo, NewSqrt2); - const __m256i b_hi = scale_round_avx2(a_hi, NewSqrt2); - _mm256_store_si256((__m256i *)b, b_lo); - _mm256_store_si256((__m256i *)(b + 8), b_hi); -} - -static INLINE void store_rect_buffer_16bit_to_32bit_w16_avx2( - const __m256i *const in, int32_t *const out, const int stride, - const int out_size) { - for (int i = 0; i < out_size; ++i) { - store_rect_16bit_to_32bit_avx2(in[i], out + i * stride); - } -} - -static const transform_1d_avx2 col_txfm16x32_arr[TX_TYPES] = { - fdct16x32_new_avx2, // DCT_DCT - NULL, // ADST_DCT - NULL, // DCT_ADST - NULL, // ADST_ADST - NULL, // FLIPADST_DCT - NULL, // DCT_FLIPADST - NULL, // FLIPADST_FLIPADST - NULL, // ADST_FLIPADST - NULL, // FLIPADST_ADST - fidentity16x32_new_avx2, // IDTX - fdct16x32_new_avx2, // V_DCT - fidentity16x32_new_avx2, // H_DCT - NULL, // V_ADST - NULL, // H_ADST - NULL, // V_FLIPADST - NULL // H_FLIPADST -}; - -static const transform_1d_avx2 row_txfm16x32_arr[TX_TYPES] = { - fdct16x32_new_avx2, // DCT_DCT - NULL, // ADST_DCT - NULL, // DCT_ADST - NULL, // ADST_ADST - NULL, // FLIPADST_DCT - NULL, // DCT_FLIPADST - NULL, // FLIPADST_FLIPADST - NULL, // ADST_FLIPADST - NULL, // FLIPADST_ADST - fidentity16x32_new_avx2, // IDTX - fidentity16x32_new_avx2, // V_DCT - fdct16x32_new_avx2, // H_DCT - NULL, // V_ADST - NULL, // H_ADST - NULL, // V_FLIPADST - NULL // H_FLIPADST -}; - -static const transform_1d_avx2 col_txfm16x16_arr[TX_TYPES] = { - fdct16x16_new_avx2, // DCT_DCT - fadst16x16_new_avx2, // ADST_DCT - fdct16x16_new_avx2, // DCT_ADST - fadst16x16_new_avx2, // ADST_ADST - fadst16x16_new_avx2, // FLIPADST_DCT - fdct16x16_new_avx2, // DCT_FLIPADST - fadst16x16_new_avx2, // FLIPADST_FLIPADST - fadst16x16_new_avx2, // ADST_FLIPADST - fadst16x16_new_avx2, // FLIPADST_ADST - fidentity16x16_new_avx2, // IDTX - fdct16x16_new_avx2, // V_DCT - fidentity16x16_new_avx2, // H_DCT - fadst16x16_new_avx2, // V_ADST - fidentity16x16_new_avx2, // H_ADST - fadst16x16_new_avx2, // V_FLIPADST - fidentity16x16_new_avx2 // H_FLIPADST -}; - -static const transform_1d_avx2 row_txfm16x16_arr[TX_TYPES] = { - fdct16x16_new_avx2, // DCT_DCT - fdct16x16_new_avx2, // ADST_DCT - fadst16x16_new_avx2, // DCT_ADST - fadst16x16_new_avx2, // ADST_ADST - fdct16x16_new_avx2, // FLIPADST_DCT - fadst16x16_new_avx2, // DCT_FLIPADST - fadst16x16_new_avx2, // FLIPADST_FLIPADST - fadst16x16_new_avx2, // ADST_FLIPADST - fadst16x16_new_avx2, // FLIPADST_ADST - fidentity16x16_new_avx2, // IDTX - fidentity16x16_new_avx2, // V_DCT - fdct16x16_new_avx2, // H_DCT - fidentity16x16_new_avx2, // V_ADST - fadst16x16_new_avx2, // H_ADST - fidentity16x16_new_avx2, // V_FLIPADST - fadst16x16_new_avx2 // H_FLIPADST -}; - -static void lowbd_fwd_txfm2d_16x16_avx2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - const TX_SIZE tx_size = TX_16X16; - __m256i buf0[16], buf1[16]; - const int8_t *shift = fwd_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - const transform_1d_avx2 col_txfm = col_txfm16x16_arr[tx_type]; - const transform_1d_avx2 row_txfm = row_txfm16x16_arr[tx_type]; - int ud_flip, lr_flip; - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - const int32_t i = 0; - if (ud_flip) { - load_buffer_16bit_to_16bit_flip_avx2(input + 16 * i, stride, buf0, height); - } else { - load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height); - } - round_shift_16bit_w16_avx2(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit_w16_avx2(buf0, height, shift[1]); - transpose_16bit_16x16_avx2(buf0, buf1 + 0 * width + 16 * i); - - __m256i *buf; - if (lr_flip) { - buf = buf0; - flip_buf_avx2(buf1 + width * i, buf, width); - } else { - buf = buf1 + width * i; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit_w16_avx2(buf, width, shift[2]); - transpose_16bit_16x16_avx2(buf, buf); - store_buffer_16bit_to_32bit_w16_avx2(buf, output + 16 * width * i, width, 16); -} - -static void lowbd_fwd_txfm2d_32x32_avx2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - const TX_SIZE tx_size = TX_32X32; - __m256i buf0[32], buf1[128]; - const int8_t *shift = fwd_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - const transform_1d_avx2 col_txfm = col_txfm16x32_arr[tx_type]; - const transform_1d_avx2 row_txfm = row_txfm16x32_arr[tx_type]; - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < 2; i++) { - if (ud_flip) { - load_buffer_16bit_to_16bit_flip_avx2(input + 16 * i, stride, buf0, - height); - } else { - load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height); - } - round_shift_16bit_w16_avx2(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit_w16_avx2(buf0, height, shift[1]); - transpose_16bit_16x16_avx2(buf0 + 0 * 16, buf1 + 0 * width + 16 * i); - transpose_16bit_16x16_avx2(buf0 + 1 * 16, buf1 + 1 * width + 16 * i); - } - - for (int i = 0; i < 2; i++) { - __m256i *buf; - if (lr_flip) { - buf = buf0; - flip_buf_avx2(buf1 + width * i, buf, width); - } else { - buf = buf1 + width * i; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit_w16_avx2(buf, width, shift[2]); - transpose_16bit_16x16_avx2(buf, buf); - store_buffer_16bit_to_32bit_w16_avx2(buf, output + 16 * width * i, width, - 16); - transpose_16bit_16x16_avx2(buf + 16, buf + 16); - store_buffer_16bit_to_32bit_w16_avx2(buf + 16, output + 16 * width * i + 16, - width, 16); - } -} - -static void lowbd_fwd_txfm2d_64x64_avx2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - (void)tx_type; - assert(tx_type == DCT_DCT); - const TX_SIZE tx_size = TX_64X64; - __m256i buf0[64], buf1[256]; - const int8_t *shift = fwd_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - const transform_1d_avx2 col_txfm = fdct16x64_new_avx2; - const int width_div16 = (width >> 4); - const int height_div16 = (height >> 4); - - for (int i = 0; i < width_div16; i++) { - load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height); - round_shift_16bit_w16_avx2(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit_w16_avx2(buf0, height, shift[1]); - for (int j = 0; j < AOMMIN(2, height_div16); ++j) { - transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i); - } - } - - for (int i = 0; i < AOMMIN(2, height_div16); i++) { - __m256i bufA[64]; - __m256i bufB[64]; - __m128i *buf = (__m128i *)(buf1 + width * i); - for (int j = 0; j < width; ++j) { - bufA[j] = _mm256_cvtepi16_epi32(buf[j * 2]); - bufB[j] = _mm256_cvtepi16_epi32(buf[j * 2 + 1]); - } - av1_fdct64_new_avx2(bufA, bufA, cos_bit_row); - av1_fdct64_new_avx2(bufB, bufB, cos_bit_row); - av1_round_shift_array_32_avx2(bufA, bufA, 32, -shift[2]); - av1_round_shift_array_32_avx2(bufB, bufB, 32, -shift[2]); - - int32_t *output8 = output + 16 * 32 * i; - for (int j = 0; j < 4; ++j) { - __m256i *out = (__m256i *)(output8 + 8 * j); - transpose_32_8x8_avx2(4, bufA + 8 * j, out); - transpose_32_8x8_avx2(4, bufB + 8 * j, out + 8 * 4); - } - } -} - -static void lowbd_fwd_txfm2d_16x32_avx2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - const TX_SIZE tx_size = TX_16X32; - __m256i buf0[32], buf1[32]; - const int8_t *shift = fwd_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - const transform_1d_avx2 col_txfm = col_txfm16x32_arr[tx_type]; - const transform_1d_avx2 row_txfm = row_txfm16x16_arr[tx_type]; - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - if (ud_flip) { - load_buffer_16bit_to_16bit_flip_avx2(input, stride, buf0, height); - } else { - load_buffer_16bit_to_16bit_avx2(input, stride, buf0, height); - } - round_shift_16bit_w16_avx2(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit_w16_avx2(buf0, height, shift[1]); - transpose_16bit_16x16_avx2(buf0, buf1); - transpose_16bit_16x16_avx2(buf0 + 16, buf1 + 16); - - for (int i = 0; i < 2; i++) { - __m256i *buf; - if (lr_flip) { - buf = buf0; - flip_buf_avx2(buf1 + width * i, buf, width); - } else { - buf = buf1 + width * i; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit_w16_avx2(buf, width, shift[2]); - transpose_16bit_16x16_avx2(buf, buf); - store_rect_buffer_16bit_to_32bit_w16_avx2(buf, output + 16 * width * i, - width, 16); - } -} - -static void lowbd_fwd_txfm2d_32x16_avx2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - __m256i buf0[32], buf1[64]; - const int8_t *shift = fwd_txfm_shift_ls[TX_32X16]; - const int txw_idx = get_txw_idx(TX_32X16); - const int txh_idx = get_txh_idx(TX_32X16); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 32; - const int height = 16; - const transform_1d_avx2 col_txfm = col_txfm16x16_arr[tx_type]; - const transform_1d_avx2 row_txfm = row_txfm16x32_arr[tx_type]; - - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < 2; i++) { - if (ud_flip) { - load_buffer_16bit_to_16bit_flip_avx2(input + 16 * i, stride, buf0, - height); - } else { - load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height); - } - round_shift_16bit_w16_avx2(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit_w16_avx2(buf0, height, shift[1]); - transpose_16bit_16x16_avx2(buf0, buf1 + 0 * width + 16 * i); - } - - __m256i *buf; - if (lr_flip) { - buf = buf0; - flip_buf_avx2(buf1, buf, width); - } else { - buf = buf1; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit_w16_avx2(buf, width, shift[2]); - transpose_16bit_16x16_avx2(buf, buf); - store_rect_buffer_16bit_to_32bit_w16_avx2(buf, output, width, 16); - - transpose_16bit_16x16_avx2(buf + 16, buf + 16); - store_rect_buffer_16bit_to_32bit_w16_avx2(buf + 16, output + 16, width, 16); -} - -static void lowbd_fwd_txfm2d_64x32_avx2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - const TX_SIZE tx_size = TX_64X32; - __m256i buf0[64], buf1[256]; - const int8_t *shift = fwd_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - const transform_1d_avx2 col_txfm = col_txfm16x32_arr[tx_type]; - const int width_div16 = (width >> 4); - const int height_div16 = (height >> 4); - - for (int i = 0; i < width_div16; i++) { - load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height); - round_shift_16bit_w16_avx2(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit_w16_avx2(buf0, height, shift[1]); - for (int j = 0; j < AOMMIN(4, height_div16); ++j) { - transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i); - } - } - assert(tx_type == DCT_DCT); - for (int i = 0; i < AOMMIN(2, height_div16); i++) { - __m256i bufA[64]; - __m256i bufB[64]; - __m128i *buf = (__m128i *)(buf1 + width * i); - for (int j = 0; j < width; ++j) { - bufA[j] = _mm256_cvtepi16_epi32(buf[j * 2]); - bufB[j] = _mm256_cvtepi16_epi32(buf[j * 2 + 1]); - } - av1_fdct64_new_avx2(bufA, bufA, cos_bit_row); - av1_fdct64_new_avx2(bufB, bufB, cos_bit_row); - av1_round_shift_rect_array_32_avx2(bufA, bufA, 32, -shift[2]); - av1_round_shift_rect_array_32_avx2(bufB, bufB, 32, -shift[2]); - - int32_t *output8 = output + 16 * 32 * i; - for (int j = 0; j < 4; ++j) { - __m256i *out = (__m256i *)(output8 + 8 * j); - transpose_32_8x8_avx2(4, bufA + 8 * j, out); - transpose_32_8x8_avx2(4, bufB + 8 * j, out + 8 * 4); - } - } -} - -static void lowbd_fwd_txfm2d_32x64_avx2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - (void)tx_type; - assert(tx_type == DCT_DCT); - const TX_SIZE tx_size = TX_32X64; - __m256i buf0[64], buf1[256]; - const int8_t *shift = fwd_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - const transform_1d_avx2 col_txfm = fdct16x64_new_avx2; - const int width_div16 = (width >> 4); - const int height_div16 = (height >> 4); - - for (int i = 0; i < width_div16; i++) { - load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height); - round_shift_16bit_w16_avx2(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit_w16_avx2(buf0, height, shift[1]); - for (int j = 0; j < AOMMIN(2, height_div16); ++j) { - transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i); - } - } - - for (int i = 0; i < AOMMIN(2, height_div16); i++) { - __m256i bufA[32]; - __m256i bufB[32]; - __m128i *buf = (__m128i *)(buf1 + width * i); - for (int j = 0; j < width; ++j) { - bufA[j] = _mm256_cvtepi16_epi32(buf[j * 2]); - bufB[j] = _mm256_cvtepi16_epi32(buf[j * 2 + 1]); - } - av1_fdct32_new_avx2(bufA, bufA, cos_bit_row); - av1_fdct32_new_avx2(bufB, bufB, cos_bit_row); - av1_round_shift_rect_array_32_avx2(bufA, bufA, 32, -shift[2]); - av1_round_shift_rect_array_32_avx2(bufB, bufB, 32, -shift[2]); - - int32_t *output8 = output + 16 * 32 * i; - for (int j = 0; j < 4; ++j) { - __m256i *out = (__m256i *)(output8 + 8 * j); - transpose_32_8x8_avx2(4, bufA + 8 * j, out); - transpose_32_8x8_avx2(4, bufB + 8 * j, out + 8 * 4); - } - } -} - -static void lowbd_fwd_txfm2d_16x64_avx2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - (void)tx_type; - assert(tx_type == DCT_DCT); - const TX_SIZE tx_size = TX_16X64; - __m256i buf0[64], buf1[64]; - const int8_t *shift = fwd_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - const transform_1d_avx2 col_txfm = fdct16x64_new_avx2; - const transform_1d_avx2 row_txfm = fdct16x16_new_avx2; - const int width_div16 = (width >> 4); - const int height_div16 = (height >> 4); - - for (int i = 0; i < width_div16; i++) { - load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height); - round_shift_16bit_w16_avx2(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit_w16_avx2(buf0, height, shift[1]); - for (int j = 0; j < height_div16; ++j) { - transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i); - } - } - - for (int i = 0; i < AOMMIN(4, height_div16); i++) { - __m256i *buf = buf1 + width * i; - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit_w16_avx2(buf, width, shift[2]); - int32_t *output16 = output + 16 * width * i; - for (int j = 0; j < width_div16; ++j) { - __m256i *buf16 = buf + 16 * j; - transpose_16bit_16x16_avx2(buf16, buf16); - store_buffer_16bit_to_32bit_w16_avx2(buf16, output16 + 16 * j, width, 16); - } - } - // Zero out the bottom 16x32 area. - memset(output + 16 * 32, 0, 16 * 32 * sizeof(*output)); -} - -static void lowbd_fwd_txfm2d_64x16_avx2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - (void)tx_type; - assert(tx_type == DCT_DCT); - const TX_SIZE tx_size = TX_64X16; - __m256i buf0[64], buf1[64]; - const int8_t *shift = fwd_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - const transform_1d_avx2 col_txfm = fdct16x16_new_avx2; - const transform_1d_avx2 row_txfm = fdct16x64_new_avx2; - const int width_div16 = (width >> 4); - const int height_div16 = (height >> 4); - - for (int i = 0; i < width_div16; i++) { - load_buffer_16bit_to_16bit_avx2(input + 16 * i, stride, buf0, height); - round_shift_16bit_w16_avx2(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit_w16_avx2(buf0, height, shift[1]); - for (int j = 0; j < height_div16; ++j) { - transpose_16bit_16x16_avx2(buf0 + j * 16, buf1 + j * width + 16 * i); - } - } - - for (int i = 0; i < height_div16; i++) { - __m256i *buf = buf1 + width * i; - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit_w16_avx2(buf, width, shift[2]); - int32_t *output16 = output + 16 * 32 * i; - for (int j = 0; j < 2; ++j) { - __m256i *buf16 = buf + 16 * j; - transpose_16bit_16x16_avx2(buf16, buf16); - store_buffer_16bit_to_32bit_w16_avx2(buf16, output16 + 16 * j, 32, 16); - } - } -} - -static FwdTxfm2dFunc fwd_txfm2d_func_ls[TX_SIZES_ALL] = { - av1_lowbd_fwd_txfm2d_4x4_sse2, // 4x4 transform - av1_lowbd_fwd_txfm2d_8x8_sse2, // 8x8 transform - lowbd_fwd_txfm2d_16x16_avx2, // 16x16 transform - lowbd_fwd_txfm2d_32x32_avx2, // 32x32 transform - lowbd_fwd_txfm2d_64x64_avx2, // 64x64 transform - av1_lowbd_fwd_txfm2d_4x8_sse2, // 4x8 transform - av1_lowbd_fwd_txfm2d_8x4_sse2, // 8x4 transform - av1_lowbd_fwd_txfm2d_8x16_sse2, // 8x16 transform - av1_lowbd_fwd_txfm2d_16x8_sse2, // 16x8 transform - lowbd_fwd_txfm2d_16x32_avx2, // 16x32 transform - lowbd_fwd_txfm2d_32x16_avx2, // 32x16 transform - lowbd_fwd_txfm2d_32x64_avx2, // 32x64 transform - lowbd_fwd_txfm2d_64x32_avx2, // 64x32 transform - av1_lowbd_fwd_txfm2d_4x16_sse2, // 4x16 transform - av1_lowbd_fwd_txfm2d_16x4_sse2, // 16x4 transform - av1_lowbd_fwd_txfm2d_8x32_sse2, // 8x32 transform - av1_lowbd_fwd_txfm2d_32x8_sse2, // 32x8 transform - lowbd_fwd_txfm2d_16x64_avx2, // 16x64 transform - lowbd_fwd_txfm2d_64x16_avx2, // 64x16 transform -}; - -void av1_lowbd_fwd_txfm_avx2(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - FwdTxfm2dFunc fwd_txfm2d_func = fwd_txfm2d_func_ls[txfm_param->tx_size]; - if ((fwd_txfm2d_func == NULL) || - (txfm_param->lossless && txfm_param->tx_size == TX_4X4)) { - av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param); - } else { - fwd_txfm2d_func(src_diff, coeff, diff_stride, txfm_param->tx_type, - txfm_param->bd); - } -} diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_sse4.c b/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_sse4.c deleted file mode 100644 index 8ec0256eb..000000000 --- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm2d_sse4.c +++ /dev/null @@ -1,365 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "config/av1_rtcd.h" - -#include "av1/common/enums.h" -#include "av1/common/av1_txfm.h" -#include "av1/common/x86/av1_txfm_sse2.h" -#include "av1/common/x86/highbd_txfm_utility_sse4.h" -#include "av1/encoder/av1_fwd_txfm1d_cfg.h" -#include "av1/encoder/x86/av1_txfm1d_sse4.h" -#include "av1/encoder/x86/av1_fwd_txfm_sse2.h" - -static INLINE void int16_array_with_stride_to_int32_array_without_stride( - const int16_t *input, int stride, int32_t *output, int txfm1d_size) { - int r, c; - for (r = 0; r < txfm1d_size; r++) { - for (c = 0; c < txfm1d_size; c++) { - output[r * txfm1d_size + c] = (int32_t)input[r * stride + c]; - } - } -} - -typedef void (*TxfmFuncSSE2)(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); - -static void fdct32_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range) { - const int txfm_size = 32; - const int num_per_128 = 4; - __m128i buf0[32]; - __m128i buf1[32]; - int col_num = txfm_size / num_per_128; - int col; - (void)stage_range; - for (col = 0; col < col_num; col++) { - int j; - for (j = 0; j < 32; ++j) { - buf0[j] = input[j * col_num + col]; - } - av1_fdct32_new_sse4_1(buf0, buf1, cos_bit); - for (j = 0; j < 32; ++j) { - output[j * col_num + col] = buf1[j]; - } - } -} - -static void fdct64_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range) { - const int txfm_size = 64; - const int num_per_128 = 4; - int col_num = txfm_size / num_per_128; - (void)stage_range; - for (int col = 0; col < col_num; col++) { - av1_fdct64_new_sse4_1((input + col), (output + col), cos_bit, col_num, - col_num); - } -} - -static INLINE TxfmFuncSSE2 fwd_txfm_type_to_func(TXFM_TYPE txfm_type) { - switch (txfm_type) { - case TXFM_TYPE_DCT32: return fdct32_new_sse4_1; break; - case TXFM_TYPE_DCT64: return fdct64_new_sse4_1; break; - default: assert(0); - } - return NULL; -} - -static INLINE void fwd_txfm2d_sse4_1(const int16_t *input, int32_t *output, - const int stride, - const TXFM_2D_FLIP_CFG *cfg, - int32_t *txfm_buf) { - // TODO(sarahparker) This does not currently support rectangular transforms - // and will break without splitting txfm_size out into row and col size. - // Rectangular transforms use c code only, so it should be ok for now. - // It will be corrected when there are sse implementations for rectangular - // transforms. - assert(cfg->tx_size < TX_SIZES); - const int txfm_size = tx_size_wide[cfg->tx_size]; - const int8_t *shift = cfg->shift; - const int8_t *stage_range_col = cfg->stage_range_col; - const int8_t *stage_range_row = cfg->stage_range_row; - const int8_t cos_bit_col = cfg->cos_bit_col; - const int8_t cos_bit_row = cfg->cos_bit_row; - const TxfmFuncSSE2 txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col); - const TxfmFuncSSE2 txfm_func_row = fwd_txfm_type_to_func(cfg->txfm_type_row); - - __m128i *buf_128 = (__m128i *)txfm_buf; - __m128i *out_128 = (__m128i *)output; - int num_per_128 = 4; - int txfm2d_size_128 = txfm_size * txfm_size / num_per_128; - - int16_array_with_stride_to_int32_array_without_stride(input, stride, txfm_buf, - txfm_size); - av1_round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[0]); - txfm_func_col(out_128, buf_128, cos_bit_col, stage_range_col); - av1_round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[1]); - transpose_32(txfm_size, out_128, buf_128); - txfm_func_row(buf_128, out_128, cos_bit_row, stage_range_row); - av1_round_shift_array_32_sse4_1(out_128, buf_128, txfm2d_size_128, -shift[2]); - transpose_32(txfm_size, buf_128, out_128); -} - -static INLINE void fwd_txfm2d_64x64_sse4_1(const int16_t *input, - int32_t *output, const int stride, - const TXFM_2D_FLIP_CFG *cfg, - int32_t *txfm_buf) { - assert(cfg->tx_size < TX_SIZES); - const int txfm_size = tx_size_wide[cfg->tx_size]; - const int8_t *shift = cfg->shift; - const int8_t *stage_range_col = cfg->stage_range_col; - const int8_t cos_bit_col = cfg->cos_bit_col; - const int8_t cos_bit_row = cfg->cos_bit_row; - const TxfmFuncSSE2 txfm_func_col = fwd_txfm_type_to_func(cfg->txfm_type_col); - __m128i *buf_128 = (__m128i *)txfm_buf; - __m128i *out_128 = (__m128i *)output; - - const int num_per_128 = 4; - int txfm2d_size_128 = txfm_size * txfm_size / num_per_128; - int col_num = txfm_size / num_per_128; - - int16_array_with_stride_to_int32_array_without_stride(input, stride, output, - txfm_size); - /*col wise transform*/ - txfm_func_col(out_128, buf_128, cos_bit_col, stage_range_col); - av1_round_shift_array_32_sse4_1(buf_128, out_128, txfm2d_size_128, -shift[1]); - transpose_32(txfm_size, out_128, buf_128); - - /*row wise transform*/ - for (int col = 0; col < (col_num >> 1); col++) { - av1_fdct64_new_sse4_1((buf_128 + col), (out_128 + col), cos_bit_row, - col_num, (col_num >> 1)); - } - - txfm2d_size_128 = (col_num >> 1) * (txfm_size >> 1); - av1_round_shift_array_32_sse4_1(out_128, buf_128, txfm2d_size_128, -shift[2]); - transpose_32x32(buf_128, out_128); -} - -void av1_fwd_txfm2d_32x32_sse4_1(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(16, int32_t, txfm_buf[1024]); - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_32X32, &cfg); - (void)bd; - fwd_txfm2d_sse4_1(input, output, stride, &cfg, txfm_buf); -} - -void av1_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - DECLARE_ALIGNED(16, int32_t, txfm_buf[4096]); - TXFM_2D_FLIP_CFG cfg; - av1_get_fwd_txfm_cfg(tx_type, TX_64X64, &cfg); - (void)bd; - fwd_txfm2d_64x64_sse4_1(input, output, stride, &cfg, txfm_buf); -} - -static INLINE void transpose_32_4x4x2(int stride, const __m128i *inputA, - const __m128i *inputB, __m128i *output) { - __m128i temp0 = _mm_unpacklo_epi32(inputA[0], inputA[2]); - __m128i temp1 = _mm_unpackhi_epi32(inputA[0], inputA[2]); - __m128i temp2 = _mm_unpacklo_epi32(inputA[1], inputA[3]); - __m128i temp3 = _mm_unpackhi_epi32(inputA[1], inputA[3]); - - output[0 * stride] = _mm_unpacklo_epi32(temp0, temp2); - output[1 * stride] = _mm_unpackhi_epi32(temp0, temp2); - output[2 * stride] = _mm_unpacklo_epi32(temp1, temp3); - output[3 * stride] = _mm_unpackhi_epi32(temp1, temp3); - - temp0 = _mm_unpacklo_epi32(inputB[0], inputB[2]); - temp1 = _mm_unpackhi_epi32(inputB[0], inputB[2]); - temp2 = _mm_unpacklo_epi32(inputB[1], inputB[3]); - temp3 = _mm_unpackhi_epi32(inputB[1], inputB[3]); - - output[4 * stride] = _mm_unpacklo_epi32(temp0, temp2); - output[5 * stride] = _mm_unpackhi_epi32(temp0, temp2); - output[6 * stride] = _mm_unpacklo_epi32(temp1, temp3); - output[7 * stride] = _mm_unpackhi_epi32(temp1, temp3); -} - -static void lowbd_fwd_txfm2d_64x64_sse4_1(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - (void)tx_type; - assert(tx_type == DCT_DCT); - const TX_SIZE tx_size = TX_64X64; - __m128i buf0[64], buf1[512]; - const int8_t *shift = fwd_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - const transform_1d_sse2 col_txfm = fdct8x64_new_sse2; - const int width_div8 = (width >> 3); - const int height_div8 = (height >> 3); - - for (int i = 0; i < width_div8; i++) { - load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height); - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - for (int j = 0; j < AOMMIN(4, height_div8); ++j) { - transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i); - } - } - for (int i = 0; i < AOMMIN(4, height_div8); i++) { - __m128i bufA[64]; - __m128i bufB[64]; - __m128i *buf = buf1 + width * i; - for (int j = 0; j < width; ++j) { - bufA[j] = _mm_cvtepi16_epi32(buf[j]); - bufB[j] = _mm_cvtepi16_epi32(_mm_unpackhi_epi64(buf[j], buf[j])); - } - av1_fdct64_new_sse4_1(bufA, bufA, cos_bit_row, 1, 1); - av1_fdct64_new_sse4_1(bufB, bufB, cos_bit_row, 1, 1); - av1_round_shift_array_32_sse4_1(bufA, bufA, 32, -shift[2]); - av1_round_shift_array_32_sse4_1(bufB, bufB, 32, -shift[2]); - - int32_t *output8 = output + 8 * 32 * i; - for (int j = 0; j < width_div8; ++j) { - __m128i *out = (__m128i *)(output8 + 4 * j); - transpose_32_4x4x2(8, bufA + 4 * j, bufB + 4 * j, out); - } - } -} - -static void lowbd_fwd_txfm2d_64x32_sse4_1(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - const TX_SIZE tx_size = TX_64X32; - __m128i buf0[64], buf1[256]; - const int8_t *shift = fwd_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - const transform_1d_sse2 col_txfm = col_txfm8x32_arr[tx_type]; - const int width_div8 = (width >> 3); - const int height_div8 = (height >> 3); - - for (int i = 0; i < width_div8; i++) { - load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height); - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - for (int j = 0; j < AOMMIN(4, height_div8); ++j) { - transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i); - } - } - assert(tx_type == DCT_DCT); - for (int i = 0; i < AOMMIN(4, height_div8); i++) { - __m128i bufA[64]; - __m128i bufB[64]; - __m128i *buf = buf1 + width * i; - for (int j = 0; j < width; ++j) { - bufA[j] = _mm_cvtepi16_epi32(buf[j]); - bufB[j] = _mm_cvtepi16_epi32(_mm_unpackhi_epi64(buf[j], buf[j])); - } - av1_fdct64_new_sse4_1(bufA, bufA, cos_bit_row, 1, 1); - av1_fdct64_new_sse4_1(bufB, bufB, cos_bit_row, 1, 1); - av1_round_shift_rect_array_32_sse4_1(bufA, bufA, 32, -shift[2], NewSqrt2); - av1_round_shift_rect_array_32_sse4_1(bufB, bufB, 32, -shift[2], NewSqrt2); - - int32_t *output8 = output + 8 * 32 * i; - for (int j = 0; j < width_div8; ++j) { - __m128i *out = (__m128i *)(output8 + 4 * j); - transpose_32_4x4x2(8, bufA + 4 * j, bufB + 4 * j, out); - } - } -} - -static void lowbd_fwd_txfm2d_32x64_sse4_1(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - (void)tx_type; - assert(tx_type == DCT_DCT); - const TX_SIZE tx_size = TX_32X64; - __m128i buf0[64], buf1[256]; - const int8_t *shift = fwd_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - const transform_1d_sse2 col_txfm = fdct8x64_new_sse2; - const int width_div8 = (width >> 3); - const int height_div8 = (height >> 3); - - for (int i = 0; i < width_div8; i++) { - load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height); - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - for (int j = 0; j < AOMMIN(4, height_div8); ++j) { - transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i); - } - } - - for (int i = 0; i < AOMMIN(4, height_div8); i++) { - __m128i bufA[32]; - __m128i bufB[32]; - __m128i *buf = buf1 + width * i; - for (int j = 0; j < width; ++j) { - bufA[j] = _mm_cvtepi16_epi32(buf[j]); - bufB[j] = _mm_cvtepi16_epi32(_mm_unpackhi_epi64(buf[j], buf[j])); - } - av1_fdct32_new_sse4_1(bufA, bufA, cos_bit_row); - av1_fdct32_new_sse4_1(bufB, bufB, cos_bit_row); - av1_round_shift_rect_array_32_sse4_1(bufA, bufA, 32, -shift[2], NewSqrt2); - av1_round_shift_rect_array_32_sse4_1(bufB, bufB, 32, -shift[2], NewSqrt2); - - int32_t *output8 = output + 8 * 32 * i; - for (int j = 0; j < (32 / 4); ++j) { - __m128i *out = (__m128i *)(output8 + 4 * j); - transpose_32_4x4x2(8, bufA + 4 * j, bufB + 4 * j, out); - } - } -} - -static FwdTxfm2dFunc fwd_txfm2d_func_ls[TX_SIZES_ALL] = { - av1_lowbd_fwd_txfm2d_4x4_sse2, // 4x4 transform - av1_lowbd_fwd_txfm2d_8x8_sse2, // 8x8 transform - av1_lowbd_fwd_txfm2d_16x16_sse2, // 16x16 transform - av1_lowbd_fwd_txfm2d_32x32_sse2, // 32x32 transform - lowbd_fwd_txfm2d_64x64_sse4_1, // 64x64 transform - av1_lowbd_fwd_txfm2d_4x8_sse2, // 4x8 transform - av1_lowbd_fwd_txfm2d_8x4_sse2, // 8x4 transform - av1_lowbd_fwd_txfm2d_8x16_sse2, // 8x16 transform - av1_lowbd_fwd_txfm2d_16x8_sse2, // 16x8 transform - av1_lowbd_fwd_txfm2d_16x32_sse2, // 16x32 transform - av1_lowbd_fwd_txfm2d_32x16_sse2, // 32x16 transform - lowbd_fwd_txfm2d_32x64_sse4_1, // 32x64 transform - lowbd_fwd_txfm2d_64x32_sse4_1, // 64x32 transform - av1_lowbd_fwd_txfm2d_4x16_sse2, // 4x16 transform - av1_lowbd_fwd_txfm2d_16x4_sse2, // 16x4 transform - av1_lowbd_fwd_txfm2d_8x32_sse2, // 8x32 transform - av1_lowbd_fwd_txfm2d_32x8_sse2, // 32x8 transform - av1_lowbd_fwd_txfm2d_16x64_sse2, // 16x64 transform - av1_lowbd_fwd_txfm2d_64x16_sse2, // 64x16 transform -}; - -void av1_lowbd_fwd_txfm_sse4_1(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - FwdTxfm2dFunc fwd_txfm2d_func = fwd_txfm2d_func_ls[txfm_param->tx_size]; - if ((fwd_txfm2d_func == NULL) || - (txfm_param->lossless && txfm_param->tx_size == TX_4X4)) { - av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param); - } else { - fwd_txfm2d_func(src_diff, coeff, diff_stride, txfm_param->tx_type, - txfm_param->bd); - } -} diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_avx2.h b/third_party/aom/av1/encoder/x86/av1_fwd_txfm_avx2.h deleted file mode 100644 index 38707137c..000000000 --- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_avx2.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_AVX2_H_ -#define AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_AVX2_H_ -#include - -static INLINE __m256i av1_round_shift_32_avx2(__m256i vec, int bit) { - __m256i tmp, round; - round = _mm256_set1_epi32(1 << (bit - 1)); - tmp = _mm256_add_epi32(vec, round); - return _mm256_srai_epi32(tmp, bit); -} - -// out0 = in0*w0 + in1*w1 -// out1 = -in1*w0 + in0*w1 -static INLINE void btf_32_avx2_type0(const int32_t w0, const int32_t w1, - __m256i *in0, __m256i *in1, - const __m256i _r, const int32_t cos_bit) { - __m256i _in0 = *in0; - __m256i _in1 = *in1; - const __m256i ww0 = _mm256_set1_epi32(w0); - const __m256i ww1 = _mm256_set1_epi32(w1); - const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0); - const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1); - __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1); - temp0 = _mm256_add_epi32(temp0, _r); - *in0 = _mm256_srai_epi32(temp0, cos_bit); - const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1); - const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0); - __m256i temp1 = _mm256_sub_epi32(in0_w1, in1_w0); - temp1 = _mm256_add_epi32(temp1, _r); - *in1 = _mm256_srai_epi32(temp1, cos_bit); -} - -static INLINE void btf_32_avx2_type1(const int32_t w0, const int32_t w1, - __m256i *in0, __m256i *in1, - const __m256i _r, const int32_t cos_bit) { - __m256i _in0 = *in0; - __m256i _in1 = *in1; - const __m256i ww0 = _mm256_set1_epi32(w0); - const __m256i ww1 = _mm256_set1_epi32(w1); - const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0); - const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1); - __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1); - temp0 = _mm256_add_epi32(temp0, _r); - *in0 = _mm256_srai_epi32(temp0, cos_bit); - const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1); - const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0); - __m256i temp1 = _mm256_sub_epi32(in1_w0, in0_w1); - temp1 = _mm256_add_epi32(temp1, _r); - *in1 = _mm256_srai_epi32(temp1, cos_bit); -} - -// out0 = in0*w0 + in1*w1 -// out1 = -in1*w0 + in0*w1 -static INLINE void btf_32_avx2_type0_new(const __m256i ww0, const __m256i ww1, - __m256i *in0, __m256i *in1, - const __m256i _r, - const int32_t cos_bit) { - __m256i _in0 = *in0; - __m256i _in1 = *in1; - const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0); - const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1); - __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1); - temp0 = _mm256_add_epi32(temp0, _r); - *in0 = _mm256_srai_epi32(temp0, cos_bit); - const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1); - const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0); - __m256i temp1 = _mm256_sub_epi32(in0_w1, in1_w0); - temp1 = _mm256_add_epi32(temp1, _r); - *in1 = _mm256_srai_epi32(temp1, cos_bit); -} - -// out0 = in0*w0 + in1*w1 -// out1 = in1*w0 - in0*w1 -static INLINE void btf_32_avx2_type1_new(const __m256i ww0, const __m256i ww1, - __m256i *in0, __m256i *in1, - const __m256i _r, - const int32_t cos_bit) { - __m256i _in0 = *in0; - __m256i _in1 = *in1; - const __m256i in0_w0 = _mm256_mullo_epi32(_in0, ww0); - const __m256i in1_w1 = _mm256_mullo_epi32(_in1, ww1); - __m256i temp0 = _mm256_add_epi32(in0_w0, in1_w1); - temp0 = _mm256_add_epi32(temp0, _r); - *in0 = _mm256_srai_epi32(temp0, cos_bit); - const __m256i in0_w1 = _mm256_mullo_epi32(_in0, ww1); - const __m256i in1_w0 = _mm256_mullo_epi32(_in1, ww0); - __m256i temp1 = _mm256_sub_epi32(in1_w0, in0_w1); - temp1 = _mm256_add_epi32(temp1, _r); - *in1 = _mm256_srai_epi32(temp1, cos_bit); -} - -#endif // AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_AVX2_H_ diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.c b/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.c deleted file mode 100644 index 6aae7ce1e..000000000 --- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.c +++ /dev/null @@ -1,2889 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include "av1/common/x86/av1_txfm_sse2.h" -#include "av1/encoder/av1_fwd_txfm1d_cfg.h" -#include "av1/encoder/x86/av1_fwd_txfm_sse2.h" - -// TODO(linfengz): refine fdct4x8 and fadst4x8 optimization (if possible). - -static void fdct4x4_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - const __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - const __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - const __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1)); - __m128i u[4], v[4]; - - u[0] = _mm_unpacklo_epi16(input[0], input[1]); - u[1] = _mm_unpacklo_epi16(input[3], input[2]); - - v[0] = _mm_add_epi16(u[0], u[1]); - v[1] = _mm_sub_epi16(u[0], u[1]); - - u[0] = _mm_madd_epi16(v[0], cospi_p32_p32); // 0 - u[1] = _mm_madd_epi16(v[0], cospi_p32_m32); // 2 - u[2] = _mm_madd_epi16(v[1], cospi_p16_p48); // 1 - u[3] = _mm_madd_epi16(v[1], cospi_p48_m16); // 3 - - v[0] = _mm_add_epi32(u[0], __rounding); - v[1] = _mm_add_epi32(u[1], __rounding); - v[2] = _mm_add_epi32(u[2], __rounding); - v[3] = _mm_add_epi32(u[3], __rounding); - u[0] = _mm_srai_epi32(v[0], cos_bit); - u[1] = _mm_srai_epi32(v[1], cos_bit); - u[2] = _mm_srai_epi32(v[2], cos_bit); - u[3] = _mm_srai_epi32(v[3], cos_bit); - - output[0] = _mm_packs_epi32(u[0], u[1]); - output[1] = _mm_packs_epi32(u[2], u[3]); - output[2] = _mm_srli_si128(output[0], 8); - output[3] = _mm_srli_si128(output[1], 8); -} - -static void fdct8x4_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1)); - - __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - - // stage 1 - __m128i x1[4]; - x1[0] = _mm_adds_epi16(input[0], input[3]); - x1[3] = _mm_subs_epi16(input[0], input[3]); - x1[1] = _mm_adds_epi16(input[1], input[2]); - x1[2] = _mm_subs_epi16(input[1], input[2]); - - // stage 2 - __m128i x2[4]; - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[0], x1[1], x2[0], x2[1]); - btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x1[2], x1[3], x2[2], x2[3]); - - // stage 3 - output[0] = x2[0]; - output[1] = x2[2]; - output[2] = x2[1]; - output[3] = x2[3]; -} - -static void fdct4x8_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1)); - - __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]); - __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]); - __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]); - __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]); - - // stage 1 - __m128i x1[8]; - x1[0] = _mm_adds_epi16(input[0], input[7]); - x1[7] = _mm_subs_epi16(input[0], input[7]); - x1[1] = _mm_adds_epi16(input[1], input[6]); - x1[6] = _mm_subs_epi16(input[1], input[6]); - x1[2] = _mm_adds_epi16(input[2], input[5]); - x1[5] = _mm_subs_epi16(input[2], input[5]); - x1[3] = _mm_adds_epi16(input[3], input[4]); - x1[4] = _mm_subs_epi16(input[3], input[4]); - - // stage 2 - __m128i x2[8]; - x2[0] = _mm_adds_epi16(x1[0], x1[3]); - x2[3] = _mm_subs_epi16(x1[0], x1[3]); - x2[1] = _mm_adds_epi16(x1[1], x1[2]); - x2[2] = _mm_subs_epi16(x1[1], x1[2]); - x2[4] = x1[4]; - btf_16_w4_sse2(&cospi_m32_p32, &cospi_p32_p32, __rounding, cos_bit, &x1[5], - &x1[6], &x2[5], &x2[6]); - x2[7] = x1[7]; - - // stage 3 - __m128i x3[8]; - btf_16_w4_sse2(&cospi_p32_p32, &cospi_p32_m32, __rounding, cos_bit, &x2[0], - &x2[1], &x3[0], &x3[1]); - btf_16_w4_sse2(&cospi_p48_p16, &cospi_m16_p48, __rounding, cos_bit, &x2[2], - &x2[3], &x3[2], &x3[3]); - x3[4] = _mm_adds_epi16(x2[4], x2[5]); - x3[5] = _mm_subs_epi16(x2[4], x2[5]); - x3[6] = _mm_subs_epi16(x2[7], x2[6]); - x3[7] = _mm_adds_epi16(x2[7], x2[6]); - - // stage 4 - __m128i x4[8]; - x4[0] = x3[0]; - x4[1] = x3[1]; - x4[2] = x3[2]; - x4[3] = x3[3]; - btf_16_w4_sse2(&cospi_p56_p08, &cospi_m08_p56, __rounding, cos_bit, &x3[4], - &x3[7], &x4[4], &x4[7]); - btf_16_w4_sse2(&cospi_p24_p40, &cospi_m40_p24, __rounding, cos_bit, &x3[5], - &x3[6], &x4[5], &x4[6]); - - // stage 5 - output[0] = x4[0]; - output[1] = x4[4]; - output[2] = x4[2]; - output[3] = x4[6]; - output[4] = x4[1]; - output[5] = x4[5]; - output[6] = x4[3]; - output[7] = x4[7]; -} - -static void fdct8x8_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1)); - - __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]); - __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]); - __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]); - __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]); - - // stage 1 - __m128i x1[8]; - x1[0] = _mm_adds_epi16(input[0], input[7]); - x1[7] = _mm_subs_epi16(input[0], input[7]); - x1[1] = _mm_adds_epi16(input[1], input[6]); - x1[6] = _mm_subs_epi16(input[1], input[6]); - x1[2] = _mm_adds_epi16(input[2], input[5]); - x1[5] = _mm_subs_epi16(input[2], input[5]); - x1[3] = _mm_adds_epi16(input[3], input[4]); - x1[4] = _mm_subs_epi16(input[3], input[4]); - - // stage 2 - __m128i x2[8]; - x2[0] = _mm_adds_epi16(x1[0], x1[3]); - x2[3] = _mm_subs_epi16(x1[0], x1[3]); - x2[1] = _mm_adds_epi16(x1[1], x1[2]); - x2[2] = _mm_subs_epi16(x1[1], x1[2]); - x2[4] = x1[4]; - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[5], x1[6], x2[5], x2[6]); - x2[7] = x1[7]; - - // stage 3 - __m128i x3[8]; - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x2[0], x2[1], x3[0], x3[1]); - btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x2[2], x2[3], x3[2], x3[3]); - x3[4] = _mm_adds_epi16(x2[4], x2[5]); - x3[5] = _mm_subs_epi16(x2[4], x2[5]); - x3[6] = _mm_subs_epi16(x2[7], x2[6]); - x3[7] = _mm_adds_epi16(x2[7], x2[6]); - - // stage 4 - __m128i x4[8]; - x4[0] = x3[0]; - x4[1] = x3[1]; - x4[2] = x3[2]; - x4[3] = x3[3]; - btf_16_sse2(cospi_p56_p08, cospi_m08_p56, x3[4], x3[7], x4[4], x4[7]); - btf_16_sse2(cospi_p24_p40, cospi_m40_p24, x3[5], x3[6], x4[5], x4[6]); - - // stage 5 - output[0] = x4[0]; - output[1] = x4[4]; - output[2] = x4[2]; - output[3] = x4[6]; - output[4] = x4[1]; - output[5] = x4[5]; - output[6] = x4[3]; - output[7] = x4[7]; -} - -static void fdct8x16_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1)); - - __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]); - __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]); - __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]); - __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]); - __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]); - __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]); - __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]); - __m128i cospi_p28_p36 = pair_set_epi16(cospi[28], cospi[36]); - __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]); - __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]); - __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]); - __m128i cospi_p12_p52 = pair_set_epi16(cospi[12], cospi[52]); - __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]); - - // stage 1 - __m128i x1[16]; - x1[0] = _mm_adds_epi16(input[0], input[15]); - x1[15] = _mm_subs_epi16(input[0], input[15]); - x1[1] = _mm_adds_epi16(input[1], input[14]); - x1[14] = _mm_subs_epi16(input[1], input[14]); - x1[2] = _mm_adds_epi16(input[2], input[13]); - x1[13] = _mm_subs_epi16(input[2], input[13]); - x1[3] = _mm_adds_epi16(input[3], input[12]); - x1[12] = _mm_subs_epi16(input[3], input[12]); - x1[4] = _mm_adds_epi16(input[4], input[11]); - x1[11] = _mm_subs_epi16(input[4], input[11]); - x1[5] = _mm_adds_epi16(input[5], input[10]); - x1[10] = _mm_subs_epi16(input[5], input[10]); - x1[6] = _mm_adds_epi16(input[6], input[9]); - x1[9] = _mm_subs_epi16(input[6], input[9]); - x1[7] = _mm_adds_epi16(input[7], input[8]); - x1[8] = _mm_subs_epi16(input[7], input[8]); - - // stage 2 - __m128i x2[16]; - x2[0] = _mm_adds_epi16(x1[0], x1[7]); - x2[7] = _mm_subs_epi16(x1[0], x1[7]); - x2[1] = _mm_adds_epi16(x1[1], x1[6]); - x2[6] = _mm_subs_epi16(x1[1], x1[6]); - x2[2] = _mm_adds_epi16(x1[2], x1[5]); - x2[5] = _mm_subs_epi16(x1[2], x1[5]); - x2[3] = _mm_adds_epi16(x1[3], x1[4]); - x2[4] = _mm_subs_epi16(x1[3], x1[4]); - x2[8] = x1[8]; - x2[9] = x1[9]; - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[10], x1[13], x2[10], x2[13]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[11], x1[12], x2[11], x2[12]); - x2[14] = x1[14]; - x2[15] = x1[15]; - - // stage 3 - __m128i x3[16]; - x3[0] = _mm_adds_epi16(x2[0], x2[3]); - x3[3] = _mm_subs_epi16(x2[0], x2[3]); - x3[1] = _mm_adds_epi16(x2[1], x2[2]); - x3[2] = _mm_subs_epi16(x2[1], x2[2]); - x3[4] = x2[4]; - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[5], x2[6], x3[5], x3[6]); - x3[7] = x2[7]; - x3[8] = _mm_adds_epi16(x2[8], x2[11]); - x3[11] = _mm_subs_epi16(x2[8], x2[11]); - x3[9] = _mm_adds_epi16(x2[9], x2[10]); - x3[10] = _mm_subs_epi16(x2[9], x2[10]); - x3[12] = _mm_subs_epi16(x2[15], x2[12]); - x3[15] = _mm_adds_epi16(x2[15], x2[12]); - x3[13] = _mm_subs_epi16(x2[14], x2[13]); - x3[14] = _mm_adds_epi16(x2[14], x2[13]); - - // stage 4 - __m128i x4[16]; - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x3[0], x3[1], x4[0], x4[1]); - btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x3[2], x3[3], x4[2], x4[3]); - x4[4] = _mm_adds_epi16(x3[4], x3[5]); - x4[5] = _mm_subs_epi16(x3[4], x3[5]); - x4[6] = _mm_subs_epi16(x3[7], x3[6]); - x4[7] = _mm_adds_epi16(x3[7], x3[6]); - x4[8] = x3[8]; - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[9], x3[14], x4[9], x4[14]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[10], x3[13], x4[10], x4[13]); - x4[11] = x3[11]; - x4[12] = x3[12]; - x4[15] = x3[15]; - - // stage 5 - __m128i x5[16]; - x5[0] = x4[0]; - x5[1] = x4[1]; - x5[2] = x4[2]; - x5[3] = x4[3]; - btf_16_sse2(cospi_p56_p08, cospi_m08_p56, x4[4], x4[7], x5[4], x5[7]); - btf_16_sse2(cospi_p24_p40, cospi_m40_p24, x4[5], x4[6], x5[5], x5[6]); - x5[8] = _mm_adds_epi16(x4[8], x4[9]); - x5[9] = _mm_subs_epi16(x4[8], x4[9]); - x5[10] = _mm_subs_epi16(x4[11], x4[10]); - x5[11] = _mm_adds_epi16(x4[11], x4[10]); - x5[12] = _mm_adds_epi16(x4[12], x4[13]); - x5[13] = _mm_subs_epi16(x4[12], x4[13]); - x5[14] = _mm_subs_epi16(x4[15], x4[14]); - x5[15] = _mm_adds_epi16(x4[15], x4[14]); - - // stage 6 - __m128i x6[16]; - x6[0] = x5[0]; - x6[1] = x5[1]; - x6[2] = x5[2]; - x6[3] = x5[3]; - x6[4] = x5[4]; - x6[5] = x5[5]; - x6[6] = x5[6]; - x6[7] = x5[7]; - btf_16_sse2(cospi_p60_p04, cospi_m04_p60, x5[8], x5[15], x6[8], x6[15]); - btf_16_sse2(cospi_p28_p36, cospi_m36_p28, x5[9], x5[14], x6[9], x6[14]); - btf_16_sse2(cospi_p44_p20, cospi_m20_p44, x5[10], x5[13], x6[10], x6[13]); - btf_16_sse2(cospi_p12_p52, cospi_m52_p12, x5[11], x5[12], x6[11], x6[12]); - - // stage 7 - output[0] = x6[0]; - output[1] = x6[8]; - output[2] = x6[4]; - output[3] = x6[12]; - output[4] = x6[2]; - output[5] = x6[10]; - output[6] = x6[6]; - output[7] = x6[14]; - output[8] = x6[1]; - output[9] = x6[9]; - output[10] = x6[5]; - output[11] = x6[13]; - output[12] = x6[3]; - output[13] = x6[11]; - output[14] = x6[7]; - output[15] = x6[15]; -} - -void fdct8x32_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1)); - - __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]); - __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]); - __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]); - __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]); - __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]); - __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]); - __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]); - __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]); - __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]); - __m128i cospi_p28_p36 = pair_set_epi16(cospi[28], cospi[36]); - __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]); - __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]); - __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]); - __m128i cospi_p12_p52 = pair_set_epi16(cospi[12], cospi[52]); - __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]); - __m128i cospi_p62_p02 = pair_set_epi16(cospi[62], cospi[2]); - __m128i cospi_m02_p62 = pair_set_epi16(-cospi[2], cospi[62]); - __m128i cospi_p30_p34 = pair_set_epi16(cospi[30], cospi[34]); - __m128i cospi_m34_p30 = pair_set_epi16(-cospi[34], cospi[30]); - __m128i cospi_p46_p18 = pair_set_epi16(cospi[46], cospi[18]); - __m128i cospi_m18_p46 = pair_set_epi16(-cospi[18], cospi[46]); - __m128i cospi_p14_p50 = pair_set_epi16(cospi[14], cospi[50]); - __m128i cospi_m50_p14 = pair_set_epi16(-cospi[50], cospi[14]); - __m128i cospi_p54_p10 = pair_set_epi16(cospi[54], cospi[10]); - __m128i cospi_m10_p54 = pair_set_epi16(-cospi[10], cospi[54]); - __m128i cospi_p22_p42 = pair_set_epi16(cospi[22], cospi[42]); - __m128i cospi_m42_p22 = pair_set_epi16(-cospi[42], cospi[22]); - __m128i cospi_p38_p26 = pair_set_epi16(cospi[38], cospi[26]); - __m128i cospi_m26_p38 = pair_set_epi16(-cospi[26], cospi[38]); - __m128i cospi_p06_p58 = pair_set_epi16(cospi[6], cospi[58]); - __m128i cospi_m58_p06 = pair_set_epi16(-cospi[58], cospi[6]); - - // stage 1 - __m128i x1[32]; - x1[0] = _mm_adds_epi16(input[0], input[31]); - x1[31] = _mm_subs_epi16(input[0], input[31]); - x1[1] = _mm_adds_epi16(input[1], input[30]); - x1[30] = _mm_subs_epi16(input[1], input[30]); - x1[2] = _mm_adds_epi16(input[2], input[29]); - x1[29] = _mm_subs_epi16(input[2], input[29]); - x1[3] = _mm_adds_epi16(input[3], input[28]); - x1[28] = _mm_subs_epi16(input[3], input[28]); - x1[4] = _mm_adds_epi16(input[4], input[27]); - x1[27] = _mm_subs_epi16(input[4], input[27]); - x1[5] = _mm_adds_epi16(input[5], input[26]); - x1[26] = _mm_subs_epi16(input[5], input[26]); - x1[6] = _mm_adds_epi16(input[6], input[25]); - x1[25] = _mm_subs_epi16(input[6], input[25]); - x1[7] = _mm_adds_epi16(input[7], input[24]); - x1[24] = _mm_subs_epi16(input[7], input[24]); - x1[8] = _mm_adds_epi16(input[8], input[23]); - x1[23] = _mm_subs_epi16(input[8], input[23]); - x1[9] = _mm_adds_epi16(input[9], input[22]); - x1[22] = _mm_subs_epi16(input[9], input[22]); - x1[10] = _mm_adds_epi16(input[10], input[21]); - x1[21] = _mm_subs_epi16(input[10], input[21]); - x1[11] = _mm_adds_epi16(input[11], input[20]); - x1[20] = _mm_subs_epi16(input[11], input[20]); - x1[12] = _mm_adds_epi16(input[12], input[19]); - x1[19] = _mm_subs_epi16(input[12], input[19]); - x1[13] = _mm_adds_epi16(input[13], input[18]); - x1[18] = _mm_subs_epi16(input[13], input[18]); - x1[14] = _mm_adds_epi16(input[14], input[17]); - x1[17] = _mm_subs_epi16(input[14], input[17]); - x1[15] = _mm_adds_epi16(input[15], input[16]); - x1[16] = _mm_subs_epi16(input[15], input[16]); - - // stage 2 - __m128i x2[32]; - x2[0] = _mm_adds_epi16(x1[0], x1[15]); - x2[15] = _mm_subs_epi16(x1[0], x1[15]); - x2[1] = _mm_adds_epi16(x1[1], x1[14]); - x2[14] = _mm_subs_epi16(x1[1], x1[14]); - x2[2] = _mm_adds_epi16(x1[2], x1[13]); - x2[13] = _mm_subs_epi16(x1[2], x1[13]); - x2[3] = _mm_adds_epi16(x1[3], x1[12]); - x2[12] = _mm_subs_epi16(x1[3], x1[12]); - x2[4] = _mm_adds_epi16(x1[4], x1[11]); - x2[11] = _mm_subs_epi16(x1[4], x1[11]); - x2[5] = _mm_adds_epi16(x1[5], x1[10]); - x2[10] = _mm_subs_epi16(x1[5], x1[10]); - x2[6] = _mm_adds_epi16(x1[6], x1[9]); - x2[9] = _mm_subs_epi16(x1[6], x1[9]); - x2[7] = _mm_adds_epi16(x1[7], x1[8]); - x2[8] = _mm_subs_epi16(x1[7], x1[8]); - x2[16] = x1[16]; - x2[17] = x1[17]; - x2[18] = x1[18]; - x2[19] = x1[19]; - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[20], x1[27], x2[20], x2[27]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[21], x1[26], x2[21], x2[26]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[22], x1[25], x2[22], x2[25]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[23], x1[24], x2[23], x2[24]); - x2[28] = x1[28]; - x2[29] = x1[29]; - x2[30] = x1[30]; - x2[31] = x1[31]; - - // stage 3 - __m128i x3[32]; - x3[0] = _mm_adds_epi16(x2[0], x2[7]); - x3[7] = _mm_subs_epi16(x2[0], x2[7]); - x3[1] = _mm_adds_epi16(x2[1], x2[6]); - x3[6] = _mm_subs_epi16(x2[1], x2[6]); - x3[2] = _mm_adds_epi16(x2[2], x2[5]); - x3[5] = _mm_subs_epi16(x2[2], x2[5]); - x3[3] = _mm_adds_epi16(x2[3], x2[4]); - x3[4] = _mm_subs_epi16(x2[3], x2[4]); - x3[8] = x2[8]; - x3[9] = x2[9]; - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[10], x2[13], x3[10], x3[13]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[11], x2[12], x3[11], x3[12]); - x3[14] = x2[14]; - x3[15] = x2[15]; - x3[16] = _mm_adds_epi16(x2[16], x2[23]); - x3[23] = _mm_subs_epi16(x2[16], x2[23]); - x3[17] = _mm_adds_epi16(x2[17], x2[22]); - x3[22] = _mm_subs_epi16(x2[17], x2[22]); - x3[18] = _mm_adds_epi16(x2[18], x2[21]); - x3[21] = _mm_subs_epi16(x2[18], x2[21]); - x3[19] = _mm_adds_epi16(x2[19], x2[20]); - x3[20] = _mm_subs_epi16(x2[19], x2[20]); - x3[24] = _mm_subs_epi16(x2[31], x2[24]); - x3[31] = _mm_adds_epi16(x2[31], x2[24]); - x3[25] = _mm_subs_epi16(x2[30], x2[25]); - x3[30] = _mm_adds_epi16(x2[30], x2[25]); - x3[26] = _mm_subs_epi16(x2[29], x2[26]); - x3[29] = _mm_adds_epi16(x2[29], x2[26]); - x3[27] = _mm_subs_epi16(x2[28], x2[27]); - x3[28] = _mm_adds_epi16(x2[28], x2[27]); - - // stage 4 - __m128i x4[32]; - x4[0] = _mm_adds_epi16(x3[0], x3[3]); - x4[3] = _mm_subs_epi16(x3[0], x3[3]); - x4[1] = _mm_adds_epi16(x3[1], x3[2]); - x4[2] = _mm_subs_epi16(x3[1], x3[2]); - x4[4] = x3[4]; - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x3[5], x3[6], x4[5], x4[6]); - x4[7] = x3[7]; - x4[8] = _mm_adds_epi16(x3[8], x3[11]); - x4[11] = _mm_subs_epi16(x3[8], x3[11]); - x4[9] = _mm_adds_epi16(x3[9], x3[10]); - x4[10] = _mm_subs_epi16(x3[9], x3[10]); - x4[12] = _mm_subs_epi16(x3[15], x3[12]); - x4[15] = _mm_adds_epi16(x3[15], x3[12]); - x4[13] = _mm_subs_epi16(x3[14], x3[13]); - x4[14] = _mm_adds_epi16(x3[14], x3[13]); - x4[16] = x3[16]; - x4[17] = x3[17]; - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[18], x3[29], x4[18], x4[29]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[19], x3[28], x4[19], x4[28]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[20], x3[27], x4[20], x4[27]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[21], x3[26], x4[21], x4[26]); - x4[22] = x3[22]; - x4[23] = x3[23]; - x4[24] = x3[24]; - x4[25] = x3[25]; - x4[30] = x3[30]; - x4[31] = x3[31]; - - // stage 5 - __m128i x5[32]; - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x4[0], x4[1], x5[0], x5[1]); - btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x4[2], x4[3], x5[2], x5[3]); - x5[4] = _mm_adds_epi16(x4[4], x4[5]); - x5[5] = _mm_subs_epi16(x4[4], x4[5]); - x5[6] = _mm_subs_epi16(x4[7], x4[6]); - x5[7] = _mm_adds_epi16(x4[7], x4[6]); - x5[8] = x4[8]; - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x4[9], x4[14], x5[9], x5[14]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x4[10], x4[13], x5[10], x5[13]); - x5[11] = x4[11]; - x5[12] = x4[12]; - x5[15] = x4[15]; - x5[16] = _mm_adds_epi16(x4[16], x4[19]); - x5[19] = _mm_subs_epi16(x4[16], x4[19]); - x5[17] = _mm_adds_epi16(x4[17], x4[18]); - x5[18] = _mm_subs_epi16(x4[17], x4[18]); - x5[20] = _mm_subs_epi16(x4[23], x4[20]); - x5[23] = _mm_adds_epi16(x4[23], x4[20]); - x5[21] = _mm_subs_epi16(x4[22], x4[21]); - x5[22] = _mm_adds_epi16(x4[22], x4[21]); - x5[24] = _mm_adds_epi16(x4[24], x4[27]); - x5[27] = _mm_subs_epi16(x4[24], x4[27]); - x5[25] = _mm_adds_epi16(x4[25], x4[26]); - x5[26] = _mm_subs_epi16(x4[25], x4[26]); - x5[28] = _mm_subs_epi16(x4[31], x4[28]); - x5[31] = _mm_adds_epi16(x4[31], x4[28]); - x5[29] = _mm_subs_epi16(x4[30], x4[29]); - x5[30] = _mm_adds_epi16(x4[30], x4[29]); - - // stage 6 - __m128i x6[32]; - x6[0] = x5[0]; - x6[1] = x5[1]; - x6[2] = x5[2]; - x6[3] = x5[3]; - btf_16_sse2(cospi_p56_p08, cospi_m08_p56, x5[4], x5[7], x6[4], x6[7]); - btf_16_sse2(cospi_p24_p40, cospi_m40_p24, x5[5], x5[6], x6[5], x6[6]); - x6[8] = _mm_adds_epi16(x5[8], x5[9]); - x6[9] = _mm_subs_epi16(x5[8], x5[9]); - x6[10] = _mm_subs_epi16(x5[11], x5[10]); - x6[11] = _mm_adds_epi16(x5[11], x5[10]); - x6[12] = _mm_adds_epi16(x5[12], x5[13]); - x6[13] = _mm_subs_epi16(x5[12], x5[13]); - x6[14] = _mm_subs_epi16(x5[15], x5[14]); - x6[15] = _mm_adds_epi16(x5[15], x5[14]); - x6[16] = x5[16]; - btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x5[17], x5[30], x6[17], x6[30]); - btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x5[18], x5[29], x6[18], x6[29]); - x6[19] = x5[19]; - x6[20] = x5[20]; - btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x5[21], x5[26], x6[21], x6[26]); - btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x5[22], x5[25], x6[22], x6[25]); - x6[23] = x5[23]; - x6[24] = x5[24]; - x6[27] = x5[27]; - x6[28] = x5[28]; - x6[31] = x5[31]; - - // stage 7 - __m128i x7[32]; - x7[0] = x6[0]; - x7[1] = x6[1]; - x7[2] = x6[2]; - x7[3] = x6[3]; - x7[4] = x6[4]; - x7[5] = x6[5]; - x7[6] = x6[6]; - x7[7] = x6[7]; - btf_16_sse2(cospi_p60_p04, cospi_m04_p60, x6[8], x6[15], x7[8], x7[15]); - btf_16_sse2(cospi_p28_p36, cospi_m36_p28, x6[9], x6[14], x7[9], x7[14]); - btf_16_sse2(cospi_p44_p20, cospi_m20_p44, x6[10], x6[13], x7[10], x7[13]); - btf_16_sse2(cospi_p12_p52, cospi_m52_p12, x6[11], x6[12], x7[11], x7[12]); - x7[16] = _mm_adds_epi16(x6[16], x6[17]); - x7[17] = _mm_subs_epi16(x6[16], x6[17]); - x7[18] = _mm_subs_epi16(x6[19], x6[18]); - x7[19] = _mm_adds_epi16(x6[19], x6[18]); - x7[20] = _mm_adds_epi16(x6[20], x6[21]); - x7[21] = _mm_subs_epi16(x6[20], x6[21]); - x7[22] = _mm_subs_epi16(x6[23], x6[22]); - x7[23] = _mm_adds_epi16(x6[23], x6[22]); - x7[24] = _mm_adds_epi16(x6[24], x6[25]); - x7[25] = _mm_subs_epi16(x6[24], x6[25]); - x7[26] = _mm_subs_epi16(x6[27], x6[26]); - x7[27] = _mm_adds_epi16(x6[27], x6[26]); - x7[28] = _mm_adds_epi16(x6[28], x6[29]); - x7[29] = _mm_subs_epi16(x6[28], x6[29]); - x7[30] = _mm_subs_epi16(x6[31], x6[30]); - x7[31] = _mm_adds_epi16(x6[31], x6[30]); - - // stage 8 - __m128i x8[32]; - x8[0] = x7[0]; - x8[1] = x7[1]; - x8[2] = x7[2]; - x8[3] = x7[3]; - x8[4] = x7[4]; - x8[5] = x7[5]; - x8[6] = x7[6]; - x8[7] = x7[7]; - x8[8] = x7[8]; - x8[9] = x7[9]; - x8[10] = x7[10]; - x8[11] = x7[11]; - x8[12] = x7[12]; - x8[13] = x7[13]; - x8[14] = x7[14]; - x8[15] = x7[15]; - btf_16_sse2(cospi_p62_p02, cospi_m02_p62, x7[16], x7[31], x8[16], x8[31]); - btf_16_sse2(cospi_p30_p34, cospi_m34_p30, x7[17], x7[30], x8[17], x8[30]); - btf_16_sse2(cospi_p46_p18, cospi_m18_p46, x7[18], x7[29], x8[18], x8[29]); - btf_16_sse2(cospi_p14_p50, cospi_m50_p14, x7[19], x7[28], x8[19], x8[28]); - btf_16_sse2(cospi_p54_p10, cospi_m10_p54, x7[20], x7[27], x8[20], x8[27]); - btf_16_sse2(cospi_p22_p42, cospi_m42_p22, x7[21], x7[26], x8[21], x8[26]); - btf_16_sse2(cospi_p38_p26, cospi_m26_p38, x7[22], x7[25], x8[22], x8[25]); - btf_16_sse2(cospi_p06_p58, cospi_m58_p06, x7[23], x7[24], x8[23], x8[24]); - - // stage 9 - output[0] = x8[0]; - output[1] = x8[16]; - output[2] = x8[8]; - output[3] = x8[24]; - output[4] = x8[4]; - output[5] = x8[20]; - output[6] = x8[12]; - output[7] = x8[28]; - output[8] = x8[2]; - output[9] = x8[18]; - output[10] = x8[10]; - output[11] = x8[26]; - output[12] = x8[6]; - output[13] = x8[22]; - output[14] = x8[14]; - output[15] = x8[30]; - output[16] = x8[1]; - output[17] = x8[17]; - output[18] = x8[9]; - output[19] = x8[25]; - output[20] = x8[5]; - output[21] = x8[21]; - output[22] = x8[13]; - output[23] = x8[29]; - output[24] = x8[3]; - output[25] = x8[19]; - output[26] = x8[11]; - output[27] = x8[27]; - output[28] = x8[7]; - output[29] = x8[23]; - output[30] = x8[15]; - output[31] = x8[31]; -} - -void fdct8x64_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1)); - - __m128i cospi_m32_p32 = pair_set_epi16(-cospi[32], cospi[32]); - __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - __m128i cospi_m16_p48 = pair_set_epi16(-cospi[16], cospi[48]); - __m128i cospi_p48_p16 = pair_set_epi16(cospi[48], cospi[16]); - __m128i cospi_m48_m16 = pair_set_epi16(-cospi[48], -cospi[16]); - __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - __m128i cospi_m08_p56 = pair_set_epi16(-cospi[8], cospi[56]); - __m128i cospi_p56_p08 = pair_set_epi16(cospi[56], cospi[8]); - __m128i cospi_m56_m08 = pair_set_epi16(-cospi[56], -cospi[8]); - __m128i cospi_m40_p24 = pair_set_epi16(-cospi[40], cospi[24]); - __m128i cospi_p24_p40 = pair_set_epi16(cospi[24], cospi[40]); - __m128i cospi_m24_m40 = pair_set_epi16(-cospi[24], -cospi[40]); - __m128i cospi_p60_p04 = pair_set_epi16(cospi[60], cospi[4]); - __m128i cospi_m04_p60 = pair_set_epi16(-cospi[4], cospi[60]); - __m128i cospi_p28_p36 = pair_set_epi16(cospi[28], cospi[36]); - __m128i cospi_m36_p28 = pair_set_epi16(-cospi[36], cospi[28]); - __m128i cospi_p44_p20 = pair_set_epi16(cospi[44], cospi[20]); - __m128i cospi_m20_p44 = pair_set_epi16(-cospi[20], cospi[44]); - __m128i cospi_p12_p52 = pair_set_epi16(cospi[12], cospi[52]); - __m128i cospi_m52_p12 = pair_set_epi16(-cospi[52], cospi[12]); - __m128i cospi_m60_m04 = pair_set_epi16(-cospi[60], -cospi[4]); - __m128i cospi_m28_m36 = pair_set_epi16(-cospi[28], -cospi[36]); - __m128i cospi_m44_m20 = pair_set_epi16(-cospi[44], -cospi[20]); - __m128i cospi_m12_m52 = pair_set_epi16(-cospi[12], -cospi[52]); - __m128i cospi_p62_p02 = pair_set_epi16(cospi[62], cospi[2]); - __m128i cospi_m02_p62 = pair_set_epi16(-cospi[2], cospi[62]); - __m128i cospi_p30_p34 = pair_set_epi16(cospi[30], cospi[34]); - __m128i cospi_m34_p30 = pair_set_epi16(-cospi[34], cospi[30]); - __m128i cospi_p46_p18 = pair_set_epi16(cospi[46], cospi[18]); - __m128i cospi_m18_p46 = pair_set_epi16(-cospi[18], cospi[46]); - __m128i cospi_p14_p50 = pair_set_epi16(cospi[14], cospi[50]); - __m128i cospi_m50_p14 = pair_set_epi16(-cospi[50], cospi[14]); - __m128i cospi_p54_p10 = pair_set_epi16(cospi[54], cospi[10]); - __m128i cospi_m10_p54 = pair_set_epi16(-cospi[10], cospi[54]); - __m128i cospi_p22_p42 = pair_set_epi16(cospi[22], cospi[42]); - __m128i cospi_m42_p22 = pair_set_epi16(-cospi[42], cospi[22]); - __m128i cospi_p38_p26 = pair_set_epi16(cospi[38], cospi[26]); - __m128i cospi_m26_p38 = pair_set_epi16(-cospi[26], cospi[38]); - __m128i cospi_p06_p58 = pair_set_epi16(cospi[6], cospi[58]); - __m128i cospi_m58_p06 = pair_set_epi16(-cospi[58], cospi[6]); - __m128i cospi_p63_p01 = pair_set_epi16(cospi[63], cospi[1]); - __m128i cospi_m01_p63 = pair_set_epi16(-cospi[1], cospi[63]); - __m128i cospi_p31_p33 = pair_set_epi16(cospi[31], cospi[33]); - __m128i cospi_m33_p31 = pair_set_epi16(-cospi[33], cospi[31]); - __m128i cospi_p47_p17 = pair_set_epi16(cospi[47], cospi[17]); - __m128i cospi_m17_p47 = pair_set_epi16(-cospi[17], cospi[47]); - __m128i cospi_p15_p49 = pair_set_epi16(cospi[15], cospi[49]); - __m128i cospi_m49_p15 = pair_set_epi16(-cospi[49], cospi[15]); - __m128i cospi_p55_p09 = pair_set_epi16(cospi[55], cospi[9]); - __m128i cospi_m09_p55 = pair_set_epi16(-cospi[9], cospi[55]); - __m128i cospi_p23_p41 = pair_set_epi16(cospi[23], cospi[41]); - __m128i cospi_m41_p23 = pair_set_epi16(-cospi[41], cospi[23]); - __m128i cospi_p39_p25 = pair_set_epi16(cospi[39], cospi[25]); - __m128i cospi_m25_p39 = pair_set_epi16(-cospi[25], cospi[39]); - __m128i cospi_p07_p57 = pair_set_epi16(cospi[7], cospi[57]); - __m128i cospi_m57_p07 = pair_set_epi16(-cospi[57], cospi[7]); - __m128i cospi_p59_p05 = pair_set_epi16(cospi[59], cospi[5]); - __m128i cospi_m05_p59 = pair_set_epi16(-cospi[5], cospi[59]); - __m128i cospi_p27_p37 = pair_set_epi16(cospi[27], cospi[37]); - __m128i cospi_m37_p27 = pair_set_epi16(-cospi[37], cospi[27]); - __m128i cospi_p43_p21 = pair_set_epi16(cospi[43], cospi[21]); - __m128i cospi_m21_p43 = pair_set_epi16(-cospi[21], cospi[43]); - __m128i cospi_p11_p53 = pair_set_epi16(cospi[11], cospi[53]); - __m128i cospi_m53_p11 = pair_set_epi16(-cospi[53], cospi[11]); - __m128i cospi_p51_p13 = pair_set_epi16(cospi[51], cospi[13]); - __m128i cospi_m13_p51 = pair_set_epi16(-cospi[13], cospi[51]); - __m128i cospi_p19_p45 = pair_set_epi16(cospi[19], cospi[45]); - __m128i cospi_m45_p19 = pair_set_epi16(-cospi[45], cospi[19]); - __m128i cospi_p35_p29 = pair_set_epi16(cospi[35], cospi[29]); - __m128i cospi_m29_p35 = pair_set_epi16(-cospi[29], cospi[35]); - __m128i cospi_p03_p61 = pair_set_epi16(cospi[3], cospi[61]); - __m128i cospi_m61_p03 = pair_set_epi16(-cospi[61], cospi[3]); - - // stage 1 - __m128i x1[64]; - x1[0] = _mm_adds_epi16(input[0], input[63]); - x1[63] = _mm_subs_epi16(input[0], input[63]); - x1[1] = _mm_adds_epi16(input[1], input[62]); - x1[62] = _mm_subs_epi16(input[1], input[62]); - x1[2] = _mm_adds_epi16(input[2], input[61]); - x1[61] = _mm_subs_epi16(input[2], input[61]); - x1[3] = _mm_adds_epi16(input[3], input[60]); - x1[60] = _mm_subs_epi16(input[3], input[60]); - x1[4] = _mm_adds_epi16(input[4], input[59]); - x1[59] = _mm_subs_epi16(input[4], input[59]); - x1[5] = _mm_adds_epi16(input[5], input[58]); - x1[58] = _mm_subs_epi16(input[5], input[58]); - x1[6] = _mm_adds_epi16(input[6], input[57]); - x1[57] = _mm_subs_epi16(input[6], input[57]); - x1[7] = _mm_adds_epi16(input[7], input[56]); - x1[56] = _mm_subs_epi16(input[7], input[56]); - x1[8] = _mm_adds_epi16(input[8], input[55]); - x1[55] = _mm_subs_epi16(input[8], input[55]); - x1[9] = _mm_adds_epi16(input[9], input[54]); - x1[54] = _mm_subs_epi16(input[9], input[54]); - x1[10] = _mm_adds_epi16(input[10], input[53]); - x1[53] = _mm_subs_epi16(input[10], input[53]); - x1[11] = _mm_adds_epi16(input[11], input[52]); - x1[52] = _mm_subs_epi16(input[11], input[52]); - x1[12] = _mm_adds_epi16(input[12], input[51]); - x1[51] = _mm_subs_epi16(input[12], input[51]); - x1[13] = _mm_adds_epi16(input[13], input[50]); - x1[50] = _mm_subs_epi16(input[13], input[50]); - x1[14] = _mm_adds_epi16(input[14], input[49]); - x1[49] = _mm_subs_epi16(input[14], input[49]); - x1[15] = _mm_adds_epi16(input[15], input[48]); - x1[48] = _mm_subs_epi16(input[15], input[48]); - x1[16] = _mm_adds_epi16(input[16], input[47]); - x1[47] = _mm_subs_epi16(input[16], input[47]); - x1[17] = _mm_adds_epi16(input[17], input[46]); - x1[46] = _mm_subs_epi16(input[17], input[46]); - x1[18] = _mm_adds_epi16(input[18], input[45]); - x1[45] = _mm_subs_epi16(input[18], input[45]); - x1[19] = _mm_adds_epi16(input[19], input[44]); - x1[44] = _mm_subs_epi16(input[19], input[44]); - x1[20] = _mm_adds_epi16(input[20], input[43]); - x1[43] = _mm_subs_epi16(input[20], input[43]); - x1[21] = _mm_adds_epi16(input[21], input[42]); - x1[42] = _mm_subs_epi16(input[21], input[42]); - x1[22] = _mm_adds_epi16(input[22], input[41]); - x1[41] = _mm_subs_epi16(input[22], input[41]); - x1[23] = _mm_adds_epi16(input[23], input[40]); - x1[40] = _mm_subs_epi16(input[23], input[40]); - x1[24] = _mm_adds_epi16(input[24], input[39]); - x1[39] = _mm_subs_epi16(input[24], input[39]); - x1[25] = _mm_adds_epi16(input[25], input[38]); - x1[38] = _mm_subs_epi16(input[25], input[38]); - x1[26] = _mm_adds_epi16(input[26], input[37]); - x1[37] = _mm_subs_epi16(input[26], input[37]); - x1[27] = _mm_adds_epi16(input[27], input[36]); - x1[36] = _mm_subs_epi16(input[27], input[36]); - x1[28] = _mm_adds_epi16(input[28], input[35]); - x1[35] = _mm_subs_epi16(input[28], input[35]); - x1[29] = _mm_adds_epi16(input[29], input[34]); - x1[34] = _mm_subs_epi16(input[29], input[34]); - x1[30] = _mm_adds_epi16(input[30], input[33]); - x1[33] = _mm_subs_epi16(input[30], input[33]); - x1[31] = _mm_adds_epi16(input[31], input[32]); - x1[32] = _mm_subs_epi16(input[31], input[32]); - - // stage 2 - __m128i x2[64]; - x2[0] = _mm_adds_epi16(x1[0], x1[31]); - x2[31] = _mm_subs_epi16(x1[0], x1[31]); - x2[1] = _mm_adds_epi16(x1[1], x1[30]); - x2[30] = _mm_subs_epi16(x1[1], x1[30]); - x2[2] = _mm_adds_epi16(x1[2], x1[29]); - x2[29] = _mm_subs_epi16(x1[2], x1[29]); - x2[3] = _mm_adds_epi16(x1[3], x1[28]); - x2[28] = _mm_subs_epi16(x1[3], x1[28]); - x2[4] = _mm_adds_epi16(x1[4], x1[27]); - x2[27] = _mm_subs_epi16(x1[4], x1[27]); - x2[5] = _mm_adds_epi16(x1[5], x1[26]); - x2[26] = _mm_subs_epi16(x1[5], x1[26]); - x2[6] = _mm_adds_epi16(x1[6], x1[25]); - x2[25] = _mm_subs_epi16(x1[6], x1[25]); - x2[7] = _mm_adds_epi16(x1[7], x1[24]); - x2[24] = _mm_subs_epi16(x1[7], x1[24]); - x2[8] = _mm_adds_epi16(x1[8], x1[23]); - x2[23] = _mm_subs_epi16(x1[8], x1[23]); - x2[9] = _mm_adds_epi16(x1[9], x1[22]); - x2[22] = _mm_subs_epi16(x1[9], x1[22]); - x2[10] = _mm_adds_epi16(x1[10], x1[21]); - x2[21] = _mm_subs_epi16(x1[10], x1[21]); - x2[11] = _mm_adds_epi16(x1[11], x1[20]); - x2[20] = _mm_subs_epi16(x1[11], x1[20]); - x2[12] = _mm_adds_epi16(x1[12], x1[19]); - x2[19] = _mm_subs_epi16(x1[12], x1[19]); - x2[13] = _mm_adds_epi16(x1[13], x1[18]); - x2[18] = _mm_subs_epi16(x1[13], x1[18]); - x2[14] = _mm_adds_epi16(x1[14], x1[17]); - x2[17] = _mm_subs_epi16(x1[14], x1[17]); - x2[15] = _mm_adds_epi16(x1[15], x1[16]); - x2[16] = _mm_subs_epi16(x1[15], x1[16]); - x2[32] = x1[32]; - x2[33] = x1[33]; - x2[34] = x1[34]; - x2[35] = x1[35]; - x2[36] = x1[36]; - x2[37] = x1[37]; - x2[38] = x1[38]; - x2[39] = x1[39]; - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[40], x1[55], x2[40], x2[55]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[41], x1[54], x2[41], x2[54]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[42], x1[53], x2[42], x2[53]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[43], x1[52], x2[43], x2[52]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[44], x1[51], x2[44], x2[51]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[45], x1[50], x2[45], x2[50]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[46], x1[49], x2[46], x2[49]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x1[47], x1[48], x2[47], x2[48]); - x2[56] = x1[56]; - x2[57] = x1[57]; - x2[58] = x1[58]; - x2[59] = x1[59]; - x2[60] = x1[60]; - x2[61] = x1[61]; - x2[62] = x1[62]; - x2[63] = x1[63]; - - // stage 3 - __m128i x3[64]; - x3[0] = _mm_adds_epi16(x2[0], x2[15]); - x3[15] = _mm_subs_epi16(x2[0], x2[15]); - x3[1] = _mm_adds_epi16(x2[1], x2[14]); - x3[14] = _mm_subs_epi16(x2[1], x2[14]); - x3[2] = _mm_adds_epi16(x2[2], x2[13]); - x3[13] = _mm_subs_epi16(x2[2], x2[13]); - x3[3] = _mm_adds_epi16(x2[3], x2[12]); - x3[12] = _mm_subs_epi16(x2[3], x2[12]); - x3[4] = _mm_adds_epi16(x2[4], x2[11]); - x3[11] = _mm_subs_epi16(x2[4], x2[11]); - x3[5] = _mm_adds_epi16(x2[5], x2[10]); - x3[10] = _mm_subs_epi16(x2[5], x2[10]); - x3[6] = _mm_adds_epi16(x2[6], x2[9]); - x3[9] = _mm_subs_epi16(x2[6], x2[9]); - x3[7] = _mm_adds_epi16(x2[7], x2[8]); - x3[8] = _mm_subs_epi16(x2[7], x2[8]); - x3[16] = x2[16]; - x3[17] = x2[17]; - x3[18] = x2[18]; - x3[19] = x2[19]; - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[20], x2[27], x3[20], x3[27]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[21], x2[26], x3[21], x3[26]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[22], x2[25], x3[22], x3[25]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x2[23], x2[24], x3[23], x3[24]); - x3[28] = x2[28]; - x3[29] = x2[29]; - x3[30] = x2[30]; - x3[31] = x2[31]; - x3[32] = _mm_adds_epi16(x2[32], x2[47]); - x3[47] = _mm_subs_epi16(x2[32], x2[47]); - x3[33] = _mm_adds_epi16(x2[33], x2[46]); - x3[46] = _mm_subs_epi16(x2[33], x2[46]); - x3[34] = _mm_adds_epi16(x2[34], x2[45]); - x3[45] = _mm_subs_epi16(x2[34], x2[45]); - x3[35] = _mm_adds_epi16(x2[35], x2[44]); - x3[44] = _mm_subs_epi16(x2[35], x2[44]); - x3[36] = _mm_adds_epi16(x2[36], x2[43]); - x3[43] = _mm_subs_epi16(x2[36], x2[43]); - x3[37] = _mm_adds_epi16(x2[37], x2[42]); - x3[42] = _mm_subs_epi16(x2[37], x2[42]); - x3[38] = _mm_adds_epi16(x2[38], x2[41]); - x3[41] = _mm_subs_epi16(x2[38], x2[41]); - x3[39] = _mm_adds_epi16(x2[39], x2[40]); - x3[40] = _mm_subs_epi16(x2[39], x2[40]); - x3[48] = _mm_subs_epi16(x2[63], x2[48]); - x3[63] = _mm_adds_epi16(x2[63], x2[48]); - x3[49] = _mm_subs_epi16(x2[62], x2[49]); - x3[62] = _mm_adds_epi16(x2[62], x2[49]); - x3[50] = _mm_subs_epi16(x2[61], x2[50]); - x3[61] = _mm_adds_epi16(x2[61], x2[50]); - x3[51] = _mm_subs_epi16(x2[60], x2[51]); - x3[60] = _mm_adds_epi16(x2[60], x2[51]); - x3[52] = _mm_subs_epi16(x2[59], x2[52]); - x3[59] = _mm_adds_epi16(x2[59], x2[52]); - x3[53] = _mm_subs_epi16(x2[58], x2[53]); - x3[58] = _mm_adds_epi16(x2[58], x2[53]); - x3[54] = _mm_subs_epi16(x2[57], x2[54]); - x3[57] = _mm_adds_epi16(x2[57], x2[54]); - x3[55] = _mm_subs_epi16(x2[56], x2[55]); - x3[56] = _mm_adds_epi16(x2[56], x2[55]); - - // stage 4 - __m128i x4[64]; - x4[0] = _mm_adds_epi16(x3[0], x3[7]); - x4[7] = _mm_subs_epi16(x3[0], x3[7]); - x4[1] = _mm_adds_epi16(x3[1], x3[6]); - x4[6] = _mm_subs_epi16(x3[1], x3[6]); - x4[2] = _mm_adds_epi16(x3[2], x3[5]); - x4[5] = _mm_subs_epi16(x3[2], x3[5]); - x4[3] = _mm_adds_epi16(x3[3], x3[4]); - x4[4] = _mm_subs_epi16(x3[3], x3[4]); - x4[8] = x3[8]; - x4[9] = x3[9]; - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x3[10], x3[13], x4[10], x4[13]); - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x3[11], x3[12], x4[11], x4[12]); - x4[14] = x3[14]; - x4[15] = x3[15]; - x4[16] = _mm_adds_epi16(x3[16], x3[23]); - x4[23] = _mm_subs_epi16(x3[16], x3[23]); - x4[17] = _mm_adds_epi16(x3[17], x3[22]); - x4[22] = _mm_subs_epi16(x3[17], x3[22]); - x4[18] = _mm_adds_epi16(x3[18], x3[21]); - x4[21] = _mm_subs_epi16(x3[18], x3[21]); - x4[19] = _mm_adds_epi16(x3[19], x3[20]); - x4[20] = _mm_subs_epi16(x3[19], x3[20]); - x4[24] = _mm_subs_epi16(x3[31], x3[24]); - x4[31] = _mm_adds_epi16(x3[31], x3[24]); - x4[25] = _mm_subs_epi16(x3[30], x3[25]); - x4[30] = _mm_adds_epi16(x3[30], x3[25]); - x4[26] = _mm_subs_epi16(x3[29], x3[26]); - x4[29] = _mm_adds_epi16(x3[29], x3[26]); - x4[27] = _mm_subs_epi16(x3[28], x3[27]); - x4[28] = _mm_adds_epi16(x3[28], x3[27]); - x4[32] = x3[32]; - x4[33] = x3[33]; - x4[34] = x3[34]; - x4[35] = x3[35]; - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[36], x3[59], x4[36], x4[59]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[37], x3[58], x4[37], x4[58]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[38], x3[57], x4[38], x4[57]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x3[39], x3[56], x4[39], x4[56]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[40], x3[55], x4[40], x4[55]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[41], x3[54], x4[41], x4[54]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[42], x3[53], x4[42], x4[53]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x3[43], x3[52], x4[43], x4[52]); - x4[44] = x3[44]; - x4[45] = x3[45]; - x4[46] = x3[46]; - x4[47] = x3[47]; - x4[48] = x3[48]; - x4[49] = x3[49]; - x4[50] = x3[50]; - x4[51] = x3[51]; - x4[60] = x3[60]; - x4[61] = x3[61]; - x4[62] = x3[62]; - x4[63] = x3[63]; - - // stage 5 - __m128i x5[64]; - x5[0] = _mm_adds_epi16(x4[0], x4[3]); - x5[3] = _mm_subs_epi16(x4[0], x4[3]); - x5[1] = _mm_adds_epi16(x4[1], x4[2]); - x5[2] = _mm_subs_epi16(x4[1], x4[2]); - x5[4] = x4[4]; - btf_16_sse2(cospi_m32_p32, cospi_p32_p32, x4[5], x4[6], x5[5], x5[6]); - x5[7] = x4[7]; - x5[8] = _mm_adds_epi16(x4[8], x4[11]); - x5[11] = _mm_subs_epi16(x4[8], x4[11]); - x5[9] = _mm_adds_epi16(x4[9], x4[10]); - x5[10] = _mm_subs_epi16(x4[9], x4[10]); - x5[12] = _mm_subs_epi16(x4[15], x4[12]); - x5[15] = _mm_adds_epi16(x4[15], x4[12]); - x5[13] = _mm_subs_epi16(x4[14], x4[13]); - x5[14] = _mm_adds_epi16(x4[14], x4[13]); - x5[16] = x4[16]; - x5[17] = x4[17]; - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x4[18], x4[29], x5[18], x5[29]); - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x4[19], x4[28], x5[19], x5[28]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x4[20], x4[27], x5[20], x5[27]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x4[21], x4[26], x5[21], x5[26]); - x5[22] = x4[22]; - x5[23] = x4[23]; - x5[24] = x4[24]; - x5[25] = x4[25]; - x5[30] = x4[30]; - x5[31] = x4[31]; - x5[32] = _mm_adds_epi16(x4[32], x4[39]); - x5[39] = _mm_subs_epi16(x4[32], x4[39]); - x5[33] = _mm_adds_epi16(x4[33], x4[38]); - x5[38] = _mm_subs_epi16(x4[33], x4[38]); - x5[34] = _mm_adds_epi16(x4[34], x4[37]); - x5[37] = _mm_subs_epi16(x4[34], x4[37]); - x5[35] = _mm_adds_epi16(x4[35], x4[36]); - x5[36] = _mm_subs_epi16(x4[35], x4[36]); - x5[40] = _mm_subs_epi16(x4[47], x4[40]); - x5[47] = _mm_adds_epi16(x4[47], x4[40]); - x5[41] = _mm_subs_epi16(x4[46], x4[41]); - x5[46] = _mm_adds_epi16(x4[46], x4[41]); - x5[42] = _mm_subs_epi16(x4[45], x4[42]); - x5[45] = _mm_adds_epi16(x4[45], x4[42]); - x5[43] = _mm_subs_epi16(x4[44], x4[43]); - x5[44] = _mm_adds_epi16(x4[44], x4[43]); - x5[48] = _mm_adds_epi16(x4[48], x4[55]); - x5[55] = _mm_subs_epi16(x4[48], x4[55]); - x5[49] = _mm_adds_epi16(x4[49], x4[54]); - x5[54] = _mm_subs_epi16(x4[49], x4[54]); - x5[50] = _mm_adds_epi16(x4[50], x4[53]); - x5[53] = _mm_subs_epi16(x4[50], x4[53]); - x5[51] = _mm_adds_epi16(x4[51], x4[52]); - x5[52] = _mm_subs_epi16(x4[51], x4[52]); - x5[56] = _mm_subs_epi16(x4[63], x4[56]); - x5[63] = _mm_adds_epi16(x4[63], x4[56]); - x5[57] = _mm_subs_epi16(x4[62], x4[57]); - x5[62] = _mm_adds_epi16(x4[62], x4[57]); - x5[58] = _mm_subs_epi16(x4[61], x4[58]); - x5[61] = _mm_adds_epi16(x4[61], x4[58]); - x5[59] = _mm_subs_epi16(x4[60], x4[59]); - x5[60] = _mm_adds_epi16(x4[60], x4[59]); - - // stage 6 - __m128i x6[64]; - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x5[0], x5[1], x6[0], x6[1]); - btf_16_sse2(cospi_p48_p16, cospi_m16_p48, x5[2], x5[3], x6[2], x6[3]); - x6[4] = _mm_adds_epi16(x5[4], x5[5]); - x6[5] = _mm_subs_epi16(x5[4], x5[5]); - x6[6] = _mm_subs_epi16(x5[7], x5[6]); - x6[7] = _mm_adds_epi16(x5[7], x5[6]); - x6[8] = x5[8]; - btf_16_sse2(cospi_m16_p48, cospi_p48_p16, x5[9], x5[14], x6[9], x6[14]); - btf_16_sse2(cospi_m48_m16, cospi_m16_p48, x5[10], x5[13], x6[10], x6[13]); - x6[11] = x5[11]; - x6[12] = x5[12]; - x6[15] = x5[15]; - x6[16] = _mm_adds_epi16(x5[16], x5[19]); - x6[19] = _mm_subs_epi16(x5[16], x5[19]); - x6[17] = _mm_adds_epi16(x5[17], x5[18]); - x6[18] = _mm_subs_epi16(x5[17], x5[18]); - x6[20] = _mm_subs_epi16(x5[23], x5[20]); - x6[23] = _mm_adds_epi16(x5[23], x5[20]); - x6[21] = _mm_subs_epi16(x5[22], x5[21]); - x6[22] = _mm_adds_epi16(x5[22], x5[21]); - x6[24] = _mm_adds_epi16(x5[24], x5[27]); - x6[27] = _mm_subs_epi16(x5[24], x5[27]); - x6[25] = _mm_adds_epi16(x5[25], x5[26]); - x6[26] = _mm_subs_epi16(x5[25], x5[26]); - x6[28] = _mm_subs_epi16(x5[31], x5[28]); - x6[31] = _mm_adds_epi16(x5[31], x5[28]); - x6[29] = _mm_subs_epi16(x5[30], x5[29]); - x6[30] = _mm_adds_epi16(x5[30], x5[29]); - x6[32] = x5[32]; - x6[33] = x5[33]; - btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x5[34], x5[61], x6[34], x6[61]); - btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x5[35], x5[60], x6[35], x6[60]); - btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x5[36], x5[59], x6[36], x6[59]); - btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x5[37], x5[58], x6[37], x6[58]); - x6[38] = x5[38]; - x6[39] = x5[39]; - x6[40] = x5[40]; - x6[41] = x5[41]; - btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x5[42], x5[53], x6[42], x6[53]); - btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x5[43], x5[52], x6[43], x6[52]); - btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x5[44], x5[51], x6[44], x6[51]); - btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x5[45], x5[50], x6[45], x6[50]); - x6[46] = x5[46]; - x6[47] = x5[47]; - x6[48] = x5[48]; - x6[49] = x5[49]; - x6[54] = x5[54]; - x6[55] = x5[55]; - x6[56] = x5[56]; - x6[57] = x5[57]; - x6[62] = x5[62]; - x6[63] = x5[63]; - - // stage 7 - __m128i x7[64]; - x7[0] = x6[0]; - x7[1] = x6[1]; - x7[2] = x6[2]; - x7[3] = x6[3]; - btf_16_sse2(cospi_p56_p08, cospi_m08_p56, x6[4], x6[7], x7[4], x7[7]); - btf_16_sse2(cospi_p24_p40, cospi_m40_p24, x6[5], x6[6], x7[5], x7[6]); - x7[8] = _mm_adds_epi16(x6[8], x6[9]); - x7[9] = _mm_subs_epi16(x6[8], x6[9]); - x7[10] = _mm_subs_epi16(x6[11], x6[10]); - x7[11] = _mm_adds_epi16(x6[11], x6[10]); - x7[12] = _mm_adds_epi16(x6[12], x6[13]); - x7[13] = _mm_subs_epi16(x6[12], x6[13]); - x7[14] = _mm_subs_epi16(x6[15], x6[14]); - x7[15] = _mm_adds_epi16(x6[15], x6[14]); - x7[16] = x6[16]; - btf_16_sse2(cospi_m08_p56, cospi_p56_p08, x6[17], x6[30], x7[17], x7[30]); - btf_16_sse2(cospi_m56_m08, cospi_m08_p56, x6[18], x6[29], x7[18], x7[29]); - x7[19] = x6[19]; - x7[20] = x6[20]; - btf_16_sse2(cospi_m40_p24, cospi_p24_p40, x6[21], x6[26], x7[21], x7[26]); - btf_16_sse2(cospi_m24_m40, cospi_m40_p24, x6[22], x6[25], x7[22], x7[25]); - x7[23] = x6[23]; - x7[24] = x6[24]; - x7[27] = x6[27]; - x7[28] = x6[28]; - x7[31] = x6[31]; - x7[32] = _mm_adds_epi16(x6[32], x6[35]); - x7[35] = _mm_subs_epi16(x6[32], x6[35]); - x7[33] = _mm_adds_epi16(x6[33], x6[34]); - x7[34] = _mm_subs_epi16(x6[33], x6[34]); - x7[36] = _mm_subs_epi16(x6[39], x6[36]); - x7[39] = _mm_adds_epi16(x6[39], x6[36]); - x7[37] = _mm_subs_epi16(x6[38], x6[37]); - x7[38] = _mm_adds_epi16(x6[38], x6[37]); - x7[40] = _mm_adds_epi16(x6[40], x6[43]); - x7[43] = _mm_subs_epi16(x6[40], x6[43]); - x7[41] = _mm_adds_epi16(x6[41], x6[42]); - x7[42] = _mm_subs_epi16(x6[41], x6[42]); - x7[44] = _mm_subs_epi16(x6[47], x6[44]); - x7[47] = _mm_adds_epi16(x6[47], x6[44]); - x7[45] = _mm_subs_epi16(x6[46], x6[45]); - x7[46] = _mm_adds_epi16(x6[46], x6[45]); - x7[48] = _mm_adds_epi16(x6[48], x6[51]); - x7[51] = _mm_subs_epi16(x6[48], x6[51]); - x7[49] = _mm_adds_epi16(x6[49], x6[50]); - x7[50] = _mm_subs_epi16(x6[49], x6[50]); - x7[52] = _mm_subs_epi16(x6[55], x6[52]); - x7[55] = _mm_adds_epi16(x6[55], x6[52]); - x7[53] = _mm_subs_epi16(x6[54], x6[53]); - x7[54] = _mm_adds_epi16(x6[54], x6[53]); - x7[56] = _mm_adds_epi16(x6[56], x6[59]); - x7[59] = _mm_subs_epi16(x6[56], x6[59]); - x7[57] = _mm_adds_epi16(x6[57], x6[58]); - x7[58] = _mm_subs_epi16(x6[57], x6[58]); - x7[60] = _mm_subs_epi16(x6[63], x6[60]); - x7[63] = _mm_adds_epi16(x6[63], x6[60]); - x7[61] = _mm_subs_epi16(x6[62], x6[61]); - x7[62] = _mm_adds_epi16(x6[62], x6[61]); - - // stage 8 - __m128i x8[64]; - x8[0] = x7[0]; - x8[1] = x7[1]; - x8[2] = x7[2]; - x8[3] = x7[3]; - x8[4] = x7[4]; - x8[5] = x7[5]; - x8[6] = x7[6]; - x8[7] = x7[7]; - btf_16_sse2(cospi_p60_p04, cospi_m04_p60, x7[8], x7[15], x8[8], x8[15]); - btf_16_sse2(cospi_p28_p36, cospi_m36_p28, x7[9], x7[14], x8[9], x8[14]); - btf_16_sse2(cospi_p44_p20, cospi_m20_p44, x7[10], x7[13], x8[10], x8[13]); - btf_16_sse2(cospi_p12_p52, cospi_m52_p12, x7[11], x7[12], x8[11], x8[12]); - x8[16] = _mm_adds_epi16(x7[16], x7[17]); - x8[17] = _mm_subs_epi16(x7[16], x7[17]); - x8[18] = _mm_subs_epi16(x7[19], x7[18]); - x8[19] = _mm_adds_epi16(x7[19], x7[18]); - x8[20] = _mm_adds_epi16(x7[20], x7[21]); - x8[21] = _mm_subs_epi16(x7[20], x7[21]); - x8[22] = _mm_subs_epi16(x7[23], x7[22]); - x8[23] = _mm_adds_epi16(x7[23], x7[22]); - x8[24] = _mm_adds_epi16(x7[24], x7[25]); - x8[25] = _mm_subs_epi16(x7[24], x7[25]); - x8[26] = _mm_subs_epi16(x7[27], x7[26]); - x8[27] = _mm_adds_epi16(x7[27], x7[26]); - x8[28] = _mm_adds_epi16(x7[28], x7[29]); - x8[29] = _mm_subs_epi16(x7[28], x7[29]); - x8[30] = _mm_subs_epi16(x7[31], x7[30]); - x8[31] = _mm_adds_epi16(x7[31], x7[30]); - x8[32] = x7[32]; - btf_16_sse2(cospi_m04_p60, cospi_p60_p04, x7[33], x7[62], x8[33], x8[62]); - btf_16_sse2(cospi_m60_m04, cospi_m04_p60, x7[34], x7[61], x8[34], x8[61]); - x8[35] = x7[35]; - x8[36] = x7[36]; - btf_16_sse2(cospi_m36_p28, cospi_p28_p36, x7[37], x7[58], x8[37], x8[58]); - btf_16_sse2(cospi_m28_m36, cospi_m36_p28, x7[38], x7[57], x8[38], x8[57]); - x8[39] = x7[39]; - x8[40] = x7[40]; - btf_16_sse2(cospi_m20_p44, cospi_p44_p20, x7[41], x7[54], x8[41], x8[54]); - btf_16_sse2(cospi_m44_m20, cospi_m20_p44, x7[42], x7[53], x8[42], x8[53]); - x8[43] = x7[43]; - x8[44] = x7[44]; - btf_16_sse2(cospi_m52_p12, cospi_p12_p52, x7[45], x7[50], x8[45], x8[50]); - btf_16_sse2(cospi_m12_m52, cospi_m52_p12, x7[46], x7[49], x8[46], x8[49]); - x8[47] = x7[47]; - x8[48] = x7[48]; - x8[51] = x7[51]; - x8[52] = x7[52]; - x8[55] = x7[55]; - x8[56] = x7[56]; - x8[59] = x7[59]; - x8[60] = x7[60]; - x8[63] = x7[63]; - - // stage 9 - __m128i x9[64]; - x9[0] = x8[0]; - x9[1] = x8[1]; - x9[2] = x8[2]; - x9[3] = x8[3]; - x9[4] = x8[4]; - x9[5] = x8[5]; - x9[6] = x8[6]; - x9[7] = x8[7]; - x9[8] = x8[8]; - x9[9] = x8[9]; - x9[10] = x8[10]; - x9[11] = x8[11]; - x9[12] = x8[12]; - x9[13] = x8[13]; - x9[14] = x8[14]; - x9[15] = x8[15]; - btf_16_sse2(cospi_p62_p02, cospi_m02_p62, x8[16], x8[31], x9[16], x9[31]); - btf_16_sse2(cospi_p30_p34, cospi_m34_p30, x8[17], x8[30], x9[17], x9[30]); - btf_16_sse2(cospi_p46_p18, cospi_m18_p46, x8[18], x8[29], x9[18], x9[29]); - btf_16_sse2(cospi_p14_p50, cospi_m50_p14, x8[19], x8[28], x9[19], x9[28]); - btf_16_sse2(cospi_p54_p10, cospi_m10_p54, x8[20], x8[27], x9[20], x9[27]); - btf_16_sse2(cospi_p22_p42, cospi_m42_p22, x8[21], x8[26], x9[21], x9[26]); - btf_16_sse2(cospi_p38_p26, cospi_m26_p38, x8[22], x8[25], x9[22], x9[25]); - btf_16_sse2(cospi_p06_p58, cospi_m58_p06, x8[23], x8[24], x9[23], x9[24]); - x9[32] = _mm_adds_epi16(x8[32], x8[33]); - x9[33] = _mm_subs_epi16(x8[32], x8[33]); - x9[34] = _mm_subs_epi16(x8[35], x8[34]); - x9[35] = _mm_adds_epi16(x8[35], x8[34]); - x9[36] = _mm_adds_epi16(x8[36], x8[37]); - x9[37] = _mm_subs_epi16(x8[36], x8[37]); - x9[38] = _mm_subs_epi16(x8[39], x8[38]); - x9[39] = _mm_adds_epi16(x8[39], x8[38]); - x9[40] = _mm_adds_epi16(x8[40], x8[41]); - x9[41] = _mm_subs_epi16(x8[40], x8[41]); - x9[42] = _mm_subs_epi16(x8[43], x8[42]); - x9[43] = _mm_adds_epi16(x8[43], x8[42]); - x9[44] = _mm_adds_epi16(x8[44], x8[45]); - x9[45] = _mm_subs_epi16(x8[44], x8[45]); - x9[46] = _mm_subs_epi16(x8[47], x8[46]); - x9[47] = _mm_adds_epi16(x8[47], x8[46]); - x9[48] = _mm_adds_epi16(x8[48], x8[49]); - x9[49] = _mm_subs_epi16(x8[48], x8[49]); - x9[50] = _mm_subs_epi16(x8[51], x8[50]); - x9[51] = _mm_adds_epi16(x8[51], x8[50]); - x9[52] = _mm_adds_epi16(x8[52], x8[53]); - x9[53] = _mm_subs_epi16(x8[52], x8[53]); - x9[54] = _mm_subs_epi16(x8[55], x8[54]); - x9[55] = _mm_adds_epi16(x8[55], x8[54]); - x9[56] = _mm_adds_epi16(x8[56], x8[57]); - x9[57] = _mm_subs_epi16(x8[56], x8[57]); - x9[58] = _mm_subs_epi16(x8[59], x8[58]); - x9[59] = _mm_adds_epi16(x8[59], x8[58]); - x9[60] = _mm_adds_epi16(x8[60], x8[61]); - x9[61] = _mm_subs_epi16(x8[60], x8[61]); - x9[62] = _mm_subs_epi16(x8[63], x8[62]); - x9[63] = _mm_adds_epi16(x8[63], x8[62]); - - // stage 10 - __m128i x10[64]; - x10[0] = x9[0]; - x10[1] = x9[1]; - x10[2] = x9[2]; - x10[3] = x9[3]; - x10[4] = x9[4]; - x10[5] = x9[5]; - x10[6] = x9[6]; - x10[7] = x9[7]; - x10[8] = x9[8]; - x10[9] = x9[9]; - x10[10] = x9[10]; - x10[11] = x9[11]; - x10[12] = x9[12]; - x10[13] = x9[13]; - x10[14] = x9[14]; - x10[15] = x9[15]; - x10[16] = x9[16]; - x10[17] = x9[17]; - x10[18] = x9[18]; - x10[19] = x9[19]; - x10[20] = x9[20]; - x10[21] = x9[21]; - x10[22] = x9[22]; - x10[23] = x9[23]; - x10[24] = x9[24]; - x10[25] = x9[25]; - x10[26] = x9[26]; - x10[27] = x9[27]; - x10[28] = x9[28]; - x10[29] = x9[29]; - x10[30] = x9[30]; - x10[31] = x9[31]; - btf_16_sse2(cospi_p63_p01, cospi_m01_p63, x9[32], x9[63], x10[32], x10[63]); - btf_16_sse2(cospi_p31_p33, cospi_m33_p31, x9[33], x9[62], x10[33], x10[62]); - btf_16_sse2(cospi_p47_p17, cospi_m17_p47, x9[34], x9[61], x10[34], x10[61]); - btf_16_sse2(cospi_p15_p49, cospi_m49_p15, x9[35], x9[60], x10[35], x10[60]); - btf_16_sse2(cospi_p55_p09, cospi_m09_p55, x9[36], x9[59], x10[36], x10[59]); - btf_16_sse2(cospi_p23_p41, cospi_m41_p23, x9[37], x9[58], x10[37], x10[58]); - btf_16_sse2(cospi_p39_p25, cospi_m25_p39, x9[38], x9[57], x10[38], x10[57]); - btf_16_sse2(cospi_p07_p57, cospi_m57_p07, x9[39], x9[56], x10[39], x10[56]); - btf_16_sse2(cospi_p59_p05, cospi_m05_p59, x9[40], x9[55], x10[40], x10[55]); - btf_16_sse2(cospi_p27_p37, cospi_m37_p27, x9[41], x9[54], x10[41], x10[54]); - btf_16_sse2(cospi_p43_p21, cospi_m21_p43, x9[42], x9[53], x10[42], x10[53]); - btf_16_sse2(cospi_p11_p53, cospi_m53_p11, x9[43], x9[52], x10[43], x10[52]); - btf_16_sse2(cospi_p51_p13, cospi_m13_p51, x9[44], x9[51], x10[44], x10[51]); - btf_16_sse2(cospi_p19_p45, cospi_m45_p19, x9[45], x9[50], x10[45], x10[50]); - btf_16_sse2(cospi_p35_p29, cospi_m29_p35, x9[46], x9[49], x10[46], x10[49]); - btf_16_sse2(cospi_p03_p61, cospi_m61_p03, x9[47], x9[48], x10[47], x10[48]); - - // stage 11 - output[0] = x10[0]; - output[1] = x10[32]; - output[2] = x10[16]; - output[3] = x10[48]; - output[4] = x10[8]; - output[5] = x10[40]; - output[6] = x10[24]; - output[7] = x10[56]; - output[8] = x10[4]; - output[9] = x10[36]; - output[10] = x10[20]; - output[11] = x10[52]; - output[12] = x10[12]; - output[13] = x10[44]; - output[14] = x10[28]; - output[15] = x10[60]; - output[16] = x10[2]; - output[17] = x10[34]; - output[18] = x10[18]; - output[19] = x10[50]; - output[20] = x10[10]; - output[21] = x10[42]; - output[22] = x10[26]; - output[23] = x10[58]; - output[24] = x10[6]; - output[25] = x10[38]; - output[26] = x10[22]; - output[27] = x10[54]; - output[28] = x10[14]; - output[29] = x10[46]; - output[30] = x10[30]; - output[31] = x10[62]; - output[32] = x10[1]; - output[33] = x10[33]; - output[34] = x10[17]; - output[35] = x10[49]; - output[36] = x10[9]; - output[37] = x10[41]; - output[38] = x10[25]; - output[39] = x10[57]; - output[40] = x10[5]; - output[41] = x10[37]; - output[42] = x10[21]; - output[43] = x10[53]; - output[44] = x10[13]; - output[45] = x10[45]; - output[46] = x10[29]; - output[47] = x10[61]; - output[48] = x10[3]; - output[49] = x10[35]; - output[50] = x10[19]; - output[51] = x10[51]; - output[52] = x10[11]; - output[53] = x10[43]; - output[54] = x10[27]; - output[55] = x10[59]; - output[56] = x10[7]; - output[57] = x10[39]; - output[58] = x10[23]; - output[59] = x10[55]; - output[60] = x10[15]; - output[61] = x10[47]; - output[62] = x10[31]; - output[63] = x10[63]; -} - -static void fadst4x4_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - const int32_t *sinpi = sinpi_arr(cos_bit); - const __m128i sinpi_p01_p02 = pair_set_epi16(sinpi[1], sinpi[2]); - const __m128i sinpi_p04_m01 = pair_set_epi16(sinpi[4], -sinpi[1]); - const __m128i sinpi_p03_p04 = pair_set_epi16(sinpi[3], sinpi[4]); - const __m128i sinpi_m03_p02 = pair_set_epi16(-sinpi[3], sinpi[2]); - const __m128i sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi[3]); - const __m128i __zero = _mm_set1_epi16(0); - const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1)); - const __m128i in7 = _mm_add_epi16(input[0], input[1]); - __m128i u[8], v[8]; - - u[0] = _mm_unpacklo_epi16(input[0], input[1]); - u[1] = _mm_unpacklo_epi16(input[2], input[3]); - u[2] = _mm_unpacklo_epi16(in7, __zero); - u[3] = _mm_unpacklo_epi16(input[2], __zero); - u[4] = _mm_unpacklo_epi16(input[3], __zero); - - v[0] = _mm_madd_epi16(u[0], sinpi_p01_p02); // s0 + s2 - v[1] = _mm_madd_epi16(u[1], sinpi_p03_p04); // s4 + s5 - v[2] = _mm_madd_epi16(u[2], sinpi_p03_p03); // x1 - v[3] = _mm_madd_epi16(u[0], sinpi_p04_m01); // s1 - s3 - v[4] = _mm_madd_epi16(u[1], sinpi_m03_p02); // -s4 + s6 - v[5] = _mm_madd_epi16(u[3], sinpi_p03_p03); // s4 - v[6] = _mm_madd_epi16(u[4], sinpi_p03_p03); - - u[0] = _mm_add_epi32(v[0], v[1]); - u[1] = _mm_sub_epi32(v[2], v[6]); - u[2] = _mm_add_epi32(v[3], v[4]); - u[3] = _mm_sub_epi32(u[2], u[0]); - u[4] = _mm_slli_epi32(v[5], 2); - u[5] = _mm_sub_epi32(u[4], v[5]); - u[6] = _mm_add_epi32(u[3], u[5]); - - v[0] = _mm_add_epi32(u[0], __rounding); - v[1] = _mm_add_epi32(u[1], __rounding); - v[2] = _mm_add_epi32(u[2], __rounding); - v[3] = _mm_add_epi32(u[6], __rounding); - - u[0] = _mm_srai_epi32(v[0], cos_bit); - u[1] = _mm_srai_epi32(v[1], cos_bit); - u[2] = _mm_srai_epi32(v[2], cos_bit); - u[3] = _mm_srai_epi32(v[3], cos_bit); - - output[0] = _mm_packs_epi32(u[0], u[2]); - output[1] = _mm_packs_epi32(u[1], u[3]); - output[2] = _mm_srli_si128(output[0], 8); - output[3] = _mm_srli_si128(output[1], 8); -} - -static void fadst4x8_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m128i __zero = _mm_setzero_si128(); - const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1)); - - __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]); - __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]); - __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]); - __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]); - __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]); - __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]); - __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]); - __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]); - __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]); - - // stage 1 - __m128i x1[8]; - x1[0] = input[0]; - x1[1] = _mm_subs_epi16(__zero, input[7]); - x1[2] = _mm_subs_epi16(__zero, input[3]); - x1[3] = input[4]; - x1[4] = _mm_subs_epi16(__zero, input[1]); - x1[5] = input[6]; - x1[6] = input[2]; - x1[7] = _mm_subs_epi16(__zero, input[5]); - - // stage 2 - __m128i x2[8]; - x2[0] = x1[0]; - x2[1] = x1[1]; - btf_16_w4_sse2(&cospi_p32_p32, &cospi_p32_m32, __rounding, cos_bit, &x1[2], - &x1[3], &x2[2], &x2[3]); - x2[4] = x1[4]; - x2[5] = x1[5]; - btf_16_w4_sse2(&cospi_p32_p32, &cospi_p32_m32, __rounding, cos_bit, &x1[6], - &x1[7], &x2[6], &x2[7]); - - // stage 3 - __m128i x3[8]; - x3[0] = _mm_adds_epi16(x2[0], x2[2]); - x3[2] = _mm_subs_epi16(x2[0], x2[2]); - x3[1] = _mm_adds_epi16(x2[1], x2[3]); - x3[3] = _mm_subs_epi16(x2[1], x2[3]); - x3[4] = _mm_adds_epi16(x2[4], x2[6]); - x3[6] = _mm_subs_epi16(x2[4], x2[6]); - x3[5] = _mm_adds_epi16(x2[5], x2[7]); - x3[7] = _mm_subs_epi16(x2[5], x2[7]); - - // stage 4 - __m128i x4[8]; - x4[0] = x3[0]; - x4[1] = x3[1]; - x4[2] = x3[2]; - x4[3] = x3[3]; - btf_16_w4_sse2(&cospi_p16_p48, &cospi_p48_m16, __rounding, cos_bit, &x3[4], - &x3[5], &x4[4], &x4[5]); - btf_16_w4_sse2(&cospi_m48_p16, &cospi_p16_p48, __rounding, cos_bit, &x3[6], - &x3[7], &x4[6], &x4[7]); - - // stage 5 - __m128i x5[8]; - x5[0] = _mm_adds_epi16(x4[0], x4[4]); - x5[4] = _mm_subs_epi16(x4[0], x4[4]); - x5[1] = _mm_adds_epi16(x4[1], x4[5]); - x5[5] = _mm_subs_epi16(x4[1], x4[5]); - x5[2] = _mm_adds_epi16(x4[2], x4[6]); - x5[6] = _mm_subs_epi16(x4[2], x4[6]); - x5[3] = _mm_adds_epi16(x4[3], x4[7]); - x5[7] = _mm_subs_epi16(x4[3], x4[7]); - - // stage 6 - __m128i x6[8]; - btf_16_w4_sse2(&cospi_p04_p60, &cospi_p60_m04, __rounding, cos_bit, &x5[0], - &x5[1], &x6[0], &x6[1]); - btf_16_w4_sse2(&cospi_p20_p44, &cospi_p44_m20, __rounding, cos_bit, &x5[2], - &x5[3], &x6[2], &x6[3]); - btf_16_w4_sse2(&cospi_p36_p28, &cospi_p28_m36, __rounding, cos_bit, &x5[4], - &x5[5], &x6[4], &x6[5]); - btf_16_w4_sse2(&cospi_p52_p12, &cospi_p12_m52, __rounding, cos_bit, &x5[6], - &x5[7], &x6[6], &x6[7]); - - // stage 7 - output[0] = x6[1]; - output[1] = x6[6]; - output[2] = x6[3]; - output[3] = x6[4]; - output[4] = x6[5]; - output[5] = x6[2]; - output[6] = x6[7]; - output[7] = x6[0]; -} - -static void fadst8x4_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - const int32_t *sinpi = sinpi_arr(cos_bit); - const __m128i sinpi_p01_p02 = pair_set_epi16(sinpi[1], sinpi[2]); - const __m128i sinpi_p04_m01 = pair_set_epi16(sinpi[4], -sinpi[1]); - const __m128i sinpi_p03_p04 = pair_set_epi16(sinpi[3], sinpi[4]); - const __m128i sinpi_m03_p02 = pair_set_epi16(-sinpi[3], sinpi[2]); - const __m128i sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi[3]); - const __m128i __zero = _mm_set1_epi16(0); - const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1)); - const __m128i in7 = _mm_add_epi16(input[0], input[1]); - __m128i u_lo[8], u_hi[8], v_lo[8], v_hi[8]; - - u_lo[0] = _mm_unpacklo_epi16(input[0], input[1]); - u_hi[0] = _mm_unpackhi_epi16(input[0], input[1]); - u_lo[1] = _mm_unpacklo_epi16(input[2], input[3]); - u_hi[1] = _mm_unpackhi_epi16(input[2], input[3]); - u_lo[2] = _mm_unpacklo_epi16(in7, __zero); - u_hi[2] = _mm_unpackhi_epi16(in7, __zero); - u_lo[3] = _mm_unpacklo_epi16(input[2], __zero); - u_hi[3] = _mm_unpackhi_epi16(input[2], __zero); - u_lo[4] = _mm_unpacklo_epi16(input[3], __zero); - u_hi[4] = _mm_unpackhi_epi16(input[3], __zero); - - v_lo[0] = _mm_madd_epi16(u_lo[0], sinpi_p01_p02); // s0 + s2 - v_hi[0] = _mm_madd_epi16(u_hi[0], sinpi_p01_p02); // s0 + s2 - v_lo[1] = _mm_madd_epi16(u_lo[1], sinpi_p03_p04); // s4 + s5 - v_hi[1] = _mm_madd_epi16(u_hi[1], sinpi_p03_p04); // s4 + s5 - v_lo[2] = _mm_madd_epi16(u_lo[2], sinpi_p03_p03); // x1 - v_hi[2] = _mm_madd_epi16(u_hi[2], sinpi_p03_p03); // x1 - v_lo[3] = _mm_madd_epi16(u_lo[0], sinpi_p04_m01); // s1 - s3 - v_hi[3] = _mm_madd_epi16(u_hi[0], sinpi_p04_m01); // s1 - s3 - v_lo[4] = _mm_madd_epi16(u_lo[1], sinpi_m03_p02); // -s4 + s6 - v_hi[4] = _mm_madd_epi16(u_hi[1], sinpi_m03_p02); // -s4 + s6 - v_lo[5] = _mm_madd_epi16(u_lo[3], sinpi_p03_p03); // s4 - v_hi[5] = _mm_madd_epi16(u_hi[3], sinpi_p03_p03); // s4 - v_lo[6] = _mm_madd_epi16(u_lo[4], sinpi_p03_p03); - v_hi[6] = _mm_madd_epi16(u_hi[4], sinpi_p03_p03); - - u_lo[0] = _mm_add_epi32(v_lo[0], v_lo[1]); - u_hi[0] = _mm_add_epi32(v_hi[0], v_hi[1]); - u_lo[1] = _mm_sub_epi32(v_lo[2], v_lo[6]); - u_hi[1] = _mm_sub_epi32(v_hi[2], v_hi[6]); - u_lo[2] = _mm_add_epi32(v_lo[3], v_lo[4]); - u_hi[2] = _mm_add_epi32(v_hi[3], v_hi[4]); - u_lo[3] = _mm_sub_epi32(u_lo[2], u_lo[0]); - u_hi[3] = _mm_sub_epi32(u_hi[2], u_hi[0]); - u_lo[4] = _mm_slli_epi32(v_lo[5], 2); - u_hi[4] = _mm_slli_epi32(v_hi[5], 2); - u_lo[5] = _mm_sub_epi32(u_lo[4], v_lo[5]); - u_hi[5] = _mm_sub_epi32(u_hi[4], v_hi[5]); - u_lo[6] = _mm_add_epi32(u_lo[3], u_lo[5]); - u_hi[6] = _mm_add_epi32(u_hi[3], u_hi[5]); - - v_lo[0] = _mm_add_epi32(u_lo[0], __rounding); - v_hi[0] = _mm_add_epi32(u_hi[0], __rounding); - v_lo[1] = _mm_add_epi32(u_lo[1], __rounding); - v_hi[1] = _mm_add_epi32(u_hi[1], __rounding); - v_lo[2] = _mm_add_epi32(u_lo[2], __rounding); - v_hi[2] = _mm_add_epi32(u_hi[2], __rounding); - v_lo[3] = _mm_add_epi32(u_lo[6], __rounding); - v_hi[3] = _mm_add_epi32(u_hi[6], __rounding); - - u_lo[0] = _mm_srai_epi32(v_lo[0], cos_bit); - u_hi[0] = _mm_srai_epi32(v_hi[0], cos_bit); - u_lo[1] = _mm_srai_epi32(v_lo[1], cos_bit); - u_hi[1] = _mm_srai_epi32(v_hi[1], cos_bit); - u_lo[2] = _mm_srai_epi32(v_lo[2], cos_bit); - u_hi[2] = _mm_srai_epi32(v_hi[2], cos_bit); - u_lo[3] = _mm_srai_epi32(v_lo[3], cos_bit); - u_hi[3] = _mm_srai_epi32(v_hi[3], cos_bit); - - output[0] = _mm_packs_epi32(u_lo[0], u_hi[0]); - output[1] = _mm_packs_epi32(u_lo[1], u_hi[1]); - output[2] = _mm_packs_epi32(u_lo[2], u_hi[2]); - output[3] = _mm_packs_epi32(u_lo[3], u_hi[3]); -} - -static void fadst8x8_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m128i __zero = _mm_setzero_si128(); - const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1)); - - __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]); - __m128i cospi_p04_p60 = pair_set_epi16(cospi[4], cospi[60]); - __m128i cospi_p60_m04 = pair_set_epi16(cospi[60], -cospi[4]); - __m128i cospi_p20_p44 = pair_set_epi16(cospi[20], cospi[44]); - __m128i cospi_p44_m20 = pair_set_epi16(cospi[44], -cospi[20]); - __m128i cospi_p36_p28 = pair_set_epi16(cospi[36], cospi[28]); - __m128i cospi_p28_m36 = pair_set_epi16(cospi[28], -cospi[36]); - __m128i cospi_p52_p12 = pair_set_epi16(cospi[52], cospi[12]); - __m128i cospi_p12_m52 = pair_set_epi16(cospi[12], -cospi[52]); - - // stage 1 - __m128i x1[8]; - x1[0] = input[0]; - x1[1] = _mm_subs_epi16(__zero, input[7]); - x1[2] = _mm_subs_epi16(__zero, input[3]); - x1[3] = input[4]; - x1[4] = _mm_subs_epi16(__zero, input[1]); - x1[5] = input[6]; - x1[6] = input[2]; - x1[7] = _mm_subs_epi16(__zero, input[5]); - - // stage 2 - __m128i x2[8]; - x2[0] = x1[0]; - x2[1] = x1[1]; - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[2], x1[3], x2[2], x2[3]); - x2[4] = x1[4]; - x2[5] = x1[5]; - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[6], x1[7], x2[6], x2[7]); - - // stage 3 - __m128i x3[8]; - x3[0] = _mm_adds_epi16(x2[0], x2[2]); - x3[2] = _mm_subs_epi16(x2[0], x2[2]); - x3[1] = _mm_adds_epi16(x2[1], x2[3]); - x3[3] = _mm_subs_epi16(x2[1], x2[3]); - x3[4] = _mm_adds_epi16(x2[4], x2[6]); - x3[6] = _mm_subs_epi16(x2[4], x2[6]); - x3[5] = _mm_adds_epi16(x2[5], x2[7]); - x3[7] = _mm_subs_epi16(x2[5], x2[7]); - - // stage 4 - __m128i x4[8]; - x4[0] = x3[0]; - x4[1] = x3[1]; - x4[2] = x3[2]; - x4[3] = x3[3]; - btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x3[4], x3[5], x4[4], x4[5]); - btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x3[6], x3[7], x4[6], x4[7]); - - // stage 5 - __m128i x5[8]; - x5[0] = _mm_adds_epi16(x4[0], x4[4]); - x5[4] = _mm_subs_epi16(x4[0], x4[4]); - x5[1] = _mm_adds_epi16(x4[1], x4[5]); - x5[5] = _mm_subs_epi16(x4[1], x4[5]); - x5[2] = _mm_adds_epi16(x4[2], x4[6]); - x5[6] = _mm_subs_epi16(x4[2], x4[6]); - x5[3] = _mm_adds_epi16(x4[3], x4[7]); - x5[7] = _mm_subs_epi16(x4[3], x4[7]); - - // stage 6 - __m128i x6[8]; - btf_16_sse2(cospi_p04_p60, cospi_p60_m04, x5[0], x5[1], x6[0], x6[1]); - btf_16_sse2(cospi_p20_p44, cospi_p44_m20, x5[2], x5[3], x6[2], x6[3]); - btf_16_sse2(cospi_p36_p28, cospi_p28_m36, x5[4], x5[5], x6[4], x6[5]); - btf_16_sse2(cospi_p52_p12, cospi_p12_m52, x5[6], x5[7], x6[6], x6[7]); - - // stage 7 - output[0] = x6[1]; - output[1] = x6[6]; - output[2] = x6[3]; - output[3] = x6[4]; - output[4] = x6[5]; - output[5] = x6[2]; - output[6] = x6[7]; - output[7] = x6[0]; -} - -static void fadst8x16_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - const int32_t *cospi = cospi_arr(cos_bit); - const __m128i __zero = _mm_setzero_si128(); - const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1)); - - __m128i cospi_p32_p32 = pair_set_epi16(cospi[32], cospi[32]); - __m128i cospi_p32_m32 = pair_set_epi16(cospi[32], -cospi[32]); - __m128i cospi_p16_p48 = pair_set_epi16(cospi[16], cospi[48]); - __m128i cospi_p48_m16 = pair_set_epi16(cospi[48], -cospi[16]); - __m128i cospi_m48_p16 = pair_set_epi16(-cospi[48], cospi[16]); - __m128i cospi_p08_p56 = pair_set_epi16(cospi[8], cospi[56]); - __m128i cospi_p56_m08 = pair_set_epi16(cospi[56], -cospi[8]); - __m128i cospi_p40_p24 = pair_set_epi16(cospi[40], cospi[24]); - __m128i cospi_p24_m40 = pair_set_epi16(cospi[24], -cospi[40]); - __m128i cospi_m56_p08 = pair_set_epi16(-cospi[56], cospi[8]); - __m128i cospi_m24_p40 = pair_set_epi16(-cospi[24], cospi[40]); - __m128i cospi_p02_p62 = pair_set_epi16(cospi[2], cospi[62]); - __m128i cospi_p62_m02 = pair_set_epi16(cospi[62], -cospi[2]); - __m128i cospi_p10_p54 = pair_set_epi16(cospi[10], cospi[54]); - __m128i cospi_p54_m10 = pair_set_epi16(cospi[54], -cospi[10]); - __m128i cospi_p18_p46 = pair_set_epi16(cospi[18], cospi[46]); - __m128i cospi_p46_m18 = pair_set_epi16(cospi[46], -cospi[18]); - __m128i cospi_p26_p38 = pair_set_epi16(cospi[26], cospi[38]); - __m128i cospi_p38_m26 = pair_set_epi16(cospi[38], -cospi[26]); - __m128i cospi_p34_p30 = pair_set_epi16(cospi[34], cospi[30]); - __m128i cospi_p30_m34 = pair_set_epi16(cospi[30], -cospi[34]); - __m128i cospi_p42_p22 = pair_set_epi16(cospi[42], cospi[22]); - __m128i cospi_p22_m42 = pair_set_epi16(cospi[22], -cospi[42]); - __m128i cospi_p50_p14 = pair_set_epi16(cospi[50], cospi[14]); - __m128i cospi_p14_m50 = pair_set_epi16(cospi[14], -cospi[50]); - __m128i cospi_p58_p06 = pair_set_epi16(cospi[58], cospi[6]); - __m128i cospi_p06_m58 = pair_set_epi16(cospi[6], -cospi[58]); - - // stage 1 - __m128i x1[16]; - x1[0] = input[0]; - x1[1] = _mm_subs_epi16(__zero, input[15]); - x1[2] = _mm_subs_epi16(__zero, input[7]); - x1[3] = input[8]; - x1[4] = _mm_subs_epi16(__zero, input[3]); - x1[5] = input[12]; - x1[6] = input[4]; - x1[7] = _mm_subs_epi16(__zero, input[11]); - x1[8] = _mm_subs_epi16(__zero, input[1]); - x1[9] = input[14]; - x1[10] = input[6]; - x1[11] = _mm_subs_epi16(__zero, input[9]); - x1[12] = input[2]; - x1[13] = _mm_subs_epi16(__zero, input[13]); - x1[14] = _mm_subs_epi16(__zero, input[5]); - x1[15] = input[10]; - - // stage 2 - __m128i x2[16]; - x2[0] = x1[0]; - x2[1] = x1[1]; - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[2], x1[3], x2[2], x2[3]); - x2[4] = x1[4]; - x2[5] = x1[5]; - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[6], x1[7], x2[6], x2[7]); - x2[8] = x1[8]; - x2[9] = x1[9]; - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[10], x1[11], x2[10], x2[11]); - x2[12] = x1[12]; - x2[13] = x1[13]; - btf_16_sse2(cospi_p32_p32, cospi_p32_m32, x1[14], x1[15], x2[14], x2[15]); - - // stage 3 - __m128i x3[16]; - x3[0] = _mm_adds_epi16(x2[0], x2[2]); - x3[2] = _mm_subs_epi16(x2[0], x2[2]); - x3[1] = _mm_adds_epi16(x2[1], x2[3]); - x3[3] = _mm_subs_epi16(x2[1], x2[3]); - x3[4] = _mm_adds_epi16(x2[4], x2[6]); - x3[6] = _mm_subs_epi16(x2[4], x2[6]); - x3[5] = _mm_adds_epi16(x2[5], x2[7]); - x3[7] = _mm_subs_epi16(x2[5], x2[7]); - x3[8] = _mm_adds_epi16(x2[8], x2[10]); - x3[10] = _mm_subs_epi16(x2[8], x2[10]); - x3[9] = _mm_adds_epi16(x2[9], x2[11]); - x3[11] = _mm_subs_epi16(x2[9], x2[11]); - x3[12] = _mm_adds_epi16(x2[12], x2[14]); - x3[14] = _mm_subs_epi16(x2[12], x2[14]); - x3[13] = _mm_adds_epi16(x2[13], x2[15]); - x3[15] = _mm_subs_epi16(x2[13], x2[15]); - - // stage 4 - __m128i x4[16]; - x4[0] = x3[0]; - x4[1] = x3[1]; - x4[2] = x3[2]; - x4[3] = x3[3]; - btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x3[4], x3[5], x4[4], x4[5]); - btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x3[6], x3[7], x4[6], x4[7]); - x4[8] = x3[8]; - x4[9] = x3[9]; - x4[10] = x3[10]; - x4[11] = x3[11]; - btf_16_sse2(cospi_p16_p48, cospi_p48_m16, x3[12], x3[13], x4[12], x4[13]); - btf_16_sse2(cospi_m48_p16, cospi_p16_p48, x3[14], x3[15], x4[14], x4[15]); - - // stage 5 - __m128i x5[16]; - x5[0] = _mm_adds_epi16(x4[0], x4[4]); - x5[4] = _mm_subs_epi16(x4[0], x4[4]); - x5[1] = _mm_adds_epi16(x4[1], x4[5]); - x5[5] = _mm_subs_epi16(x4[1], x4[5]); - x5[2] = _mm_adds_epi16(x4[2], x4[6]); - x5[6] = _mm_subs_epi16(x4[2], x4[6]); - x5[3] = _mm_adds_epi16(x4[3], x4[7]); - x5[7] = _mm_subs_epi16(x4[3], x4[7]); - x5[8] = _mm_adds_epi16(x4[8], x4[12]); - x5[12] = _mm_subs_epi16(x4[8], x4[12]); - x5[9] = _mm_adds_epi16(x4[9], x4[13]); - x5[13] = _mm_subs_epi16(x4[9], x4[13]); - x5[10] = _mm_adds_epi16(x4[10], x4[14]); - x5[14] = _mm_subs_epi16(x4[10], x4[14]); - x5[11] = _mm_adds_epi16(x4[11], x4[15]); - x5[15] = _mm_subs_epi16(x4[11], x4[15]); - - // stage 6 - __m128i x6[16]; - x6[0] = x5[0]; - x6[1] = x5[1]; - x6[2] = x5[2]; - x6[3] = x5[3]; - x6[4] = x5[4]; - x6[5] = x5[5]; - x6[6] = x5[6]; - x6[7] = x5[7]; - btf_16_sse2(cospi_p08_p56, cospi_p56_m08, x5[8], x5[9], x6[8], x6[9]); - btf_16_sse2(cospi_p40_p24, cospi_p24_m40, x5[10], x5[11], x6[10], x6[11]); - btf_16_sse2(cospi_m56_p08, cospi_p08_p56, x5[12], x5[13], x6[12], x6[13]); - btf_16_sse2(cospi_m24_p40, cospi_p40_p24, x5[14], x5[15], x6[14], x6[15]); - - // stage 7 - __m128i x7[16]; - x7[0] = _mm_adds_epi16(x6[0], x6[8]); - x7[8] = _mm_subs_epi16(x6[0], x6[8]); - x7[1] = _mm_adds_epi16(x6[1], x6[9]); - x7[9] = _mm_subs_epi16(x6[1], x6[9]); - x7[2] = _mm_adds_epi16(x6[2], x6[10]); - x7[10] = _mm_subs_epi16(x6[2], x6[10]); - x7[3] = _mm_adds_epi16(x6[3], x6[11]); - x7[11] = _mm_subs_epi16(x6[3], x6[11]); - x7[4] = _mm_adds_epi16(x6[4], x6[12]); - x7[12] = _mm_subs_epi16(x6[4], x6[12]); - x7[5] = _mm_adds_epi16(x6[5], x6[13]); - x7[13] = _mm_subs_epi16(x6[5], x6[13]); - x7[6] = _mm_adds_epi16(x6[6], x6[14]); - x7[14] = _mm_subs_epi16(x6[6], x6[14]); - x7[7] = _mm_adds_epi16(x6[7], x6[15]); - x7[15] = _mm_subs_epi16(x6[7], x6[15]); - - // stage 8 - __m128i x8[16]; - btf_16_sse2(cospi_p02_p62, cospi_p62_m02, x7[0], x7[1], x8[0], x8[1]); - btf_16_sse2(cospi_p10_p54, cospi_p54_m10, x7[2], x7[3], x8[2], x8[3]); - btf_16_sse2(cospi_p18_p46, cospi_p46_m18, x7[4], x7[5], x8[4], x8[5]); - btf_16_sse2(cospi_p26_p38, cospi_p38_m26, x7[6], x7[7], x8[6], x8[7]); - btf_16_sse2(cospi_p34_p30, cospi_p30_m34, x7[8], x7[9], x8[8], x8[9]); - btf_16_sse2(cospi_p42_p22, cospi_p22_m42, x7[10], x7[11], x8[10], x8[11]); - btf_16_sse2(cospi_p50_p14, cospi_p14_m50, x7[12], x7[13], x8[12], x8[13]); - btf_16_sse2(cospi_p58_p06, cospi_p06_m58, x7[14], x7[15], x8[14], x8[15]); - - // stage 9 - output[0] = x8[1]; - output[1] = x8[14]; - output[2] = x8[3]; - output[3] = x8[12]; - output[4] = x8[5]; - output[5] = x8[10]; - output[6] = x8[7]; - output[7] = x8[8]; - output[8] = x8[9]; - output[9] = x8[6]; - output[10] = x8[11]; - output[11] = x8[4]; - output[12] = x8[13]; - output[13] = x8[2]; - output[14] = x8[15]; - output[15] = x8[0]; -} - -static const transform_1d_sse2 col_txfm4x4_arr[TX_TYPES] = { - fdct4x4_new_sse2, // DCT_DCT - fadst4x4_new_sse2, // ADST_DCT - fdct4x4_new_sse2, // DCT_ADST - fadst4x4_new_sse2, // ADST_ADST - fadst4x4_new_sse2, // FLIPADST_DCT - fdct4x4_new_sse2, // DCT_FLIPADST - fadst4x4_new_sse2, // FLIPADST_FLIPADST - fadst4x4_new_sse2, // ADST_FLIPADST - fadst4x4_new_sse2, // FLIPADST_ADST - fidentity4x4_new_sse2, // IDTX - fdct4x4_new_sse2, // V_DCT - fidentity4x4_new_sse2, // H_DCT - fadst4x4_new_sse2, // V_ADST - fidentity4x4_new_sse2, // H_ADST - fadst4x4_new_sse2, // V_FLIPADST - fidentity4x4_new_sse2 // H_FLIPADST -}; - -static const transform_1d_sse2 row_txfm4x4_arr[TX_TYPES] = { - fdct4x4_new_sse2, // DCT_DCT - fdct4x4_new_sse2, // ADST_DCT - fadst4x4_new_sse2, // DCT_ADST - fadst4x4_new_sse2, // ADST_ADST - fdct4x4_new_sse2, // FLIPADST_DCT - fadst4x4_new_sse2, // DCT_FLIPADST - fadst4x4_new_sse2, // FLIPADST_FLIPADST - fadst4x4_new_sse2, // ADST_FLIPADST - fadst4x4_new_sse2, // FLIPADST_ADST - fidentity4x4_new_sse2, // IDTX - fidentity4x4_new_sse2, // V_DCT - fdct4x4_new_sse2, // H_DCT - fidentity4x4_new_sse2, // V_ADST - fadst4x4_new_sse2, // H_ADST - fidentity4x4_new_sse2, // V_FLIPADST - fadst4x4_new_sse2 // H_FLIPADST -}; - -static const transform_1d_sse2 col_txfm4x8_arr[TX_TYPES] = { - fdct4x8_new_sse2, // DCT_DCT - fadst4x8_new_sse2, // ADST_DCT - fdct4x8_new_sse2, // DCT_ADST - fadst4x8_new_sse2, // ADST_ADST - fadst4x8_new_sse2, // FLIPADST_DCT - fdct4x8_new_sse2, // DCT_FLIPADST - fadst4x8_new_sse2, // FLIPADST_FLIPADST - fadst4x8_new_sse2, // ADST_FLIPADST - fadst4x8_new_sse2, // FLIPADST_ADST - fidentity8x8_new_sse2, // IDTX - fdct4x8_new_sse2, // V_DCT - fidentity8x8_new_sse2, // H_DCT - fadst4x8_new_sse2, // V_ADST - fidentity8x8_new_sse2, // H_ADST - fadst4x8_new_sse2, // V_FLIPADST - fidentity8x8_new_sse2 // H_FLIPADST -}; - -static const transform_1d_sse2 row_txfm8x4_arr[TX_TYPES] = { - fdct8x4_new_sse2, // DCT_DCT - fdct8x4_new_sse2, // ADST_DCT - fadst8x4_new_sse2, // DCT_ADST - fadst8x4_new_sse2, // ADST_ADST - fdct8x4_new_sse2, // FLIPADST_DCT - fadst8x4_new_sse2, // DCT_FLIPADST - fadst8x4_new_sse2, // FLIPADST_FLIPADST - fadst8x4_new_sse2, // ADST_FLIPADST - fadst8x4_new_sse2, // FLIPADST_ADST - fidentity8x4_new_sse2, // IDTX - fidentity8x4_new_sse2, // V_DCT - fdct8x4_new_sse2, // H_DCT - fidentity8x4_new_sse2, // V_ADST - fadst8x4_new_sse2, // H_ADST - fidentity8x4_new_sse2, // V_FLIPADST - fadst8x4_new_sse2 // H_FLIPADST -}; - -static const transform_1d_sse2 col_txfm8x4_arr[TX_TYPES] = { - fdct8x4_new_sse2, // DCT_DCT - fadst8x4_new_sse2, // ADST_DCT - fdct8x4_new_sse2, // DCT_ADST - fadst8x4_new_sse2, // ADST_ADST - fadst8x4_new_sse2, // FLIPADST_DCT - fdct8x4_new_sse2, // DCT_FLIPADST - fadst8x4_new_sse2, // FLIPADST_FLIPADST - fadst8x4_new_sse2, // ADST_FLIPADST - fadst8x4_new_sse2, // FLIPADST_ADST - fidentity8x4_new_sse2, // IDTX - fdct8x4_new_sse2, // V_DCT - fidentity8x4_new_sse2, // H_DCT - fadst8x4_new_sse2, // V_ADST - fidentity8x4_new_sse2, // H_ADST - fadst8x4_new_sse2, // V_FLIPADST - fidentity8x4_new_sse2 // H_FLIPADST -}; - -static const transform_1d_sse2 row_txfm4x8_arr[TX_TYPES] = { - fdct4x8_new_sse2, // DCT_DCT - fdct4x8_new_sse2, // ADST_DCT - fadst4x8_new_sse2, // DCT_ADST - fadst4x8_new_sse2, // ADST_ADST - fdct4x8_new_sse2, // FLIPADST_DCT - fadst4x8_new_sse2, // DCT_FLIPADST - fadst4x8_new_sse2, // FLIPADST_FLIPADST - fadst4x8_new_sse2, // ADST_FLIPADST - fadst4x8_new_sse2, // FLIPADST_ADST - fidentity8x8_new_sse2, // IDTX - fidentity8x8_new_sse2, // V_DCT - fdct4x8_new_sse2, // H_DCT - fidentity8x8_new_sse2, // V_ADST - fadst4x8_new_sse2, // H_ADST - fidentity8x8_new_sse2, // V_FLIPADST - fadst4x8_new_sse2 // H_FLIPADST -}; - -static const transform_1d_sse2 col_txfm8x8_arr[TX_TYPES] = { - fdct8x8_new_sse2, // DCT_DCT - fadst8x8_new_sse2, // ADST_DCT - fdct8x8_new_sse2, // DCT_ADST - fadst8x8_new_sse2, // ADST_ADST - fadst8x8_new_sse2, // FLIPADST_DCT - fdct8x8_new_sse2, // DCT_FLIPADST - fadst8x8_new_sse2, // FLIPADST_FLIPADST - fadst8x8_new_sse2, // ADST_FLIPADST - fadst8x8_new_sse2, // FLIPADST_ADST - fidentity8x8_new_sse2, // IDTX - fdct8x8_new_sse2, // V_DCT - fidentity8x8_new_sse2, // H_DCT - fadst8x8_new_sse2, // V_ADST - fidentity8x8_new_sse2, // H_ADST - fadst8x8_new_sse2, // V_FLIPADST - fidentity8x8_new_sse2, // H_FLIPADST -}; - -static const transform_1d_sse2 row_txfm8x8_arr[TX_TYPES] = { - fdct8x8_new_sse2, // DCT_DCT - fdct8x8_new_sse2, // ADST_DCT - fadst8x8_new_sse2, // DCT_ADST - fadst8x8_new_sse2, // ADST_ADST - fdct8x8_new_sse2, // FLIPADST_DCT - fadst8x8_new_sse2, // DCT_FLIPADST - fadst8x8_new_sse2, // FLIPADST_FLIPADST - fadst8x8_new_sse2, // ADST_FLIPADST - fadst8x8_new_sse2, // FLIPADST_ADST - fidentity8x8_new_sse2, // IDTX - fidentity8x8_new_sse2, // V_DCT - fdct8x8_new_sse2, // H_DCT - fidentity8x8_new_sse2, // V_ADST - fadst8x8_new_sse2, // H_ADST - fidentity8x8_new_sse2, // V_FLIPADST - fadst8x8_new_sse2 // H_FLIPADST -}; - -static const transform_1d_sse2 col_txfm8x16_arr[TX_TYPES] = { - fdct8x16_new_sse2, // DCT_DCT - fadst8x16_new_sse2, // ADST_DCT - fdct8x16_new_sse2, // DCT_ADST - fadst8x16_new_sse2, // ADST_ADST - fadst8x16_new_sse2, // FLIPADST_DCT - fdct8x16_new_sse2, // DCT_FLIPADST - fadst8x16_new_sse2, // FLIPADST_FLIPADST - fadst8x16_new_sse2, // ADST_FLIPADST - fadst8x16_new_sse2, // FLIPADST_ADST - fidentity8x16_new_sse2, // IDTX - fdct8x16_new_sse2, // V_DCT - fidentity8x16_new_sse2, // H_DCT - fadst8x16_new_sse2, // V_ADST - fidentity8x16_new_sse2, // H_ADST - fadst8x16_new_sse2, // V_FLIPADST - fidentity8x16_new_sse2 // H_FLIPADST -}; - -static const transform_1d_sse2 row_txfm8x16_arr[TX_TYPES] = { - fdct8x16_new_sse2, // DCT_DCT - fdct8x16_new_sse2, // ADST_DCT - fadst8x16_new_sse2, // DCT_ADST - fadst8x16_new_sse2, // ADST_ADST - fdct8x16_new_sse2, // FLIPADST_DCT - fadst8x16_new_sse2, // DCT_FLIPADST - fadst8x16_new_sse2, // FLIPADST_FLIPADST - fadst8x16_new_sse2, // ADST_FLIPADST - fadst8x16_new_sse2, // FLIPADST_ADST - fidentity8x16_new_sse2, // IDTX - fidentity8x16_new_sse2, // V_DCT - fdct8x16_new_sse2, // H_DCT - fidentity8x16_new_sse2, // V_ADST - fadst8x16_new_sse2, // H_ADST - fidentity8x16_new_sse2, // V_FLIPADST - fadst8x16_new_sse2 // H_FLIPADST -}; - -static const transform_1d_sse2 row_txfm8x32_arr[TX_TYPES] = { - fdct8x32_new_sse2, // DCT_DCT - NULL, // ADST_DCT - NULL, // DCT_ADST - NULL, // ADST_ADST - NULL, // FLIPADST_DCT - NULL, // DCT_FLIPADST - NULL, // FLIPADST_FLIPADST - NULL, // ADST_FLIPADST - NULL, // FLIPADST_ADST - fidentity8x32_new_sse2, // IDTX - fidentity8x32_new_sse2, // V_DCT - fdct8x32_new_sse2, // H_DCT - NULL, // V_ADST - NULL, // H_ADST - NULL, // V_FLIPADST - NULL // H_FLIPADST -}; - -void av1_lowbd_fwd_txfm2d_4x4_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - __m128i buf0[4], buf1[4], *buf; - const int8_t *shift = fwd_txfm_shift_ls[TX_4X4]; - const int txw_idx = get_txw_idx(TX_4X4); - const int txh_idx = get_txh_idx(TX_4X4); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 4; - const int height = 4; - const transform_1d_sse2 col_txfm = col_txfm4x4_arr[tx_type]; - const transform_1d_sse2 row_txfm = row_txfm4x4_arr[tx_type]; - int ud_flip, lr_flip; - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - if (ud_flip) { - load_buffer_16bit_to_16bit_w4_flip(input, stride, buf0, height); - } else { - load_buffer_16bit_to_16bit_w4(input, stride, buf0, height); - } - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - transpose_16bit_4x4(buf0, buf1); - - if (lr_flip) { - buf = buf0; - flip_buf_sse2(buf1, buf, width); - } else { - buf = buf1; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - transpose_16bit_4x4(buf, buf); - store_buffer_16bit_to_32bit_w4(buf, output, width, height); -} - -void av1_lowbd_fwd_txfm2d_4x8_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)stride; - (void)bd; - __m128i buf0[8], buf1[8], *buf; - const int8_t *shift = fwd_txfm_shift_ls[TX_4X8]; - const int txw_idx = get_txw_idx(TX_4X8); - const int txh_idx = get_txh_idx(TX_4X8); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 4; - const int height = 8; - const transform_1d_sse2 col_txfm = col_txfm4x8_arr[tx_type]; - const transform_1d_sse2 row_txfm = row_txfm8x4_arr[tx_type]; - int ud_flip, lr_flip; - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - if (ud_flip) { - load_buffer_16bit_to_16bit_w4_flip(input, stride, buf0, height); - } else { - load_buffer_16bit_to_16bit_w4(input, stride, buf0, height); - } - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - transpose_16bit_4x8(buf0, buf1); - - if (lr_flip) { - buf = buf0; - flip_buf_sse2(buf1, buf, width); - } else { - buf = buf1; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - transpose_16bit_8x4(buf, buf); - store_rect_buffer_16bit_to_32bit_w4(buf, output, width, height); -} - -void av1_lowbd_fwd_txfm2d_4x16_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - __m128i buf0[16], buf1[16]; - const int8_t *shift = fwd_txfm_shift_ls[TX_4X16]; - const int txw_idx = get_txw_idx(TX_4X16); - const int txh_idx = get_txh_idx(TX_4X16); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 4; - const int height = 16; - const transform_1d_sse2 col_txfm = col_txfm8x16_arr[tx_type]; - const transform_1d_sse2 row_txfm = row_txfm8x4_arr[tx_type]; - int ud_flip, lr_flip; - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - if (ud_flip) { - load_buffer_16bit_to_16bit_w4_flip(input, stride, buf0, height); - } else { - load_buffer_16bit_to_16bit_w4(input, stride, buf0, height); - } - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - transpose_16bit_4x8(buf0, buf1); - transpose_16bit_4x8(buf0 + 8, buf1 + 8); - - for (int i = 0; i < 2; i++) { - __m128i *buf; - if (lr_flip) { - buf = buf0; - flip_buf_sse2(buf1 + 8 * i, buf, width); - } else { - buf = buf1 + 8 * i; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - transpose_16bit_8x4(buf, buf); - store_buffer_16bit_to_32bit_w4(buf, output + 8 * width * i, width, 8); - } -} - -void av1_lowbd_fwd_txfm2d_8x4_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - __m128i buf0[8], buf1[8], *buf; - const int8_t *shift = fwd_txfm_shift_ls[TX_8X4]; - const int txw_idx = get_txw_idx(TX_8X4); - const int txh_idx = get_txh_idx(TX_8X4); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 8; - const int height = 4; - const transform_1d_sse2 col_txfm = col_txfm8x4_arr[tx_type]; - const transform_1d_sse2 row_txfm = row_txfm4x8_arr[tx_type]; - int ud_flip, lr_flip; - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - if (ud_flip) - load_buffer_16bit_to_16bit_flip(input, stride, buf0, height); - else - load_buffer_16bit_to_16bit(input, stride, buf0, height); - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - transpose_16bit_8x8(buf0, buf1); - - if (lr_flip) { - buf = buf0; - flip_buf_sse2(buf1, buf, width); - } else { - buf = buf1; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - transpose_16bit_8x8(buf, buf); - store_rect_buffer_16bit_to_32bit_w8(buf, output, width, height); -} - -void av1_lowbd_fwd_txfm2d_8x8_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - __m128i buf0[8], buf1[8], *buf; - const int8_t *shift = fwd_txfm_shift_ls[TX_8X8]; - const int txw_idx = get_txw_idx(TX_8X8); - const int txh_idx = get_txh_idx(TX_8X8); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 8; - const int height = 8; - const transform_1d_sse2 col_txfm = col_txfm8x8_arr[tx_type]; - const transform_1d_sse2 row_txfm = row_txfm8x8_arr[tx_type]; - int ud_flip, lr_flip; - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - if (ud_flip) - load_buffer_16bit_to_16bit_flip(input, stride, buf0, height); - else - load_buffer_16bit_to_16bit(input, stride, buf0, height); - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - transpose_16bit_8x8(buf0, buf1); - - if (lr_flip) { - buf = buf0; - flip_buf_sse2(buf1, buf, width); - } else { - buf = buf1; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - transpose_16bit_8x8(buf, buf); - store_buffer_16bit_to_32bit_w8(buf, output, width, height); -} - -void av1_lowbd_fwd_txfm2d_8x16_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - __m128i buf0[16], buf1[16]; - const int8_t *shift = fwd_txfm_shift_ls[TX_8X16]; - const int txw_idx = get_txw_idx(TX_8X16); - const int txh_idx = get_txh_idx(TX_8X16); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 8; - const int height = 16; - const transform_1d_sse2 col_txfm = col_txfm8x16_arr[tx_type]; - const transform_1d_sse2 row_txfm = row_txfm8x8_arr[tx_type]; - int ud_flip, lr_flip; - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - if (ud_flip) { - load_buffer_16bit_to_16bit_flip(input, stride, buf0, height); - } else { - load_buffer_16bit_to_16bit(input, stride, buf0, height); - } - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - transpose_16bit_8x8(buf0, buf1); - transpose_16bit_8x8(buf0 + 8, buf1 + 8); - - for (int i = 0; i < 2; i++) { - __m128i *buf; - if (lr_flip) { - buf = buf0; - flip_buf_sse2(buf1 + width * i, buf, width); - } else { - buf = buf1 + width * i; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - transpose_16bit_8x8(buf, buf); - store_rect_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, 8); - } -} - -void av1_lowbd_fwd_txfm2d_8x32_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - __m128i buf0[32], buf1[32]; - const int8_t *shift = fwd_txfm_shift_ls[TX_8X32]; - const int txw_idx = get_txw_idx(TX_8X32); - const int txh_idx = get_txh_idx(TX_8X32); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 8; - const int height = 32; - const transform_1d_sse2 col_txfm = col_txfm8x32_arr[tx_type]; - const transform_1d_sse2 row_txfm = row_txfm8x8_arr[tx_type]; - int ud_flip, lr_flip; - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - if (ud_flip) { - load_buffer_16bit_to_16bit_flip(input, stride, buf0, height); - } else { - load_buffer_16bit_to_16bit(input, stride, buf0, height); - } - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - transpose_16bit_8x8(buf0, buf1); - transpose_16bit_8x8(buf0 + 8, buf1 + 8); - transpose_16bit_8x8(buf0 + 16, buf1 + 16); - transpose_16bit_8x8(buf0 + 24, buf1 + 24); - - for (int i = 0; i < 4; i++) { - __m128i *buf; - if (lr_flip) { - buf = buf0; - flip_buf_sse2(buf1 + width * i, buf, width); - } else { - buf = buf1 + width * i; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - transpose_16bit_8x8(buf, buf); - store_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, 8); - } -} - -void av1_lowbd_fwd_txfm2d_16x4_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - __m128i buf0[16], buf1[16]; - const int8_t *shift = fwd_txfm_shift_ls[TX_16X4]; - const int txw_idx = get_txw_idx(TX_16X4); - const int txh_idx = get_txh_idx(TX_16X4); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 16; - const int height = 4; - const transform_1d_sse2 col_txfm = col_txfm8x4_arr[tx_type]; - const transform_1d_sse2 row_txfm = row_txfm8x16_arr[tx_type]; - __m128i *buf; - int ud_flip, lr_flip; - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - for (int i = 0; i < 2; i++) { - if (ud_flip) { - load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height); - } else { - load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height); - } - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - transpose_16bit_8x4(buf0, buf1 + 8 * i); - } - - if (lr_flip) { - buf = buf0; - flip_buf_sse2(buf1, buf, width); - } else { - buf = buf1; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - transpose_16bit_4x8(buf, buf); - store_buffer_16bit_to_32bit_w8(buf, output, width, height); - transpose_16bit_4x8(buf + 8, buf + 8); - store_buffer_16bit_to_32bit_w8(buf + 8, output + 8, width, height); -} - -void av1_lowbd_fwd_txfm2d_16x8_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - __m128i buf0[16], buf1[16]; - const int8_t *shift = fwd_txfm_shift_ls[TX_16X8]; - const int txw_idx = get_txw_idx(TX_16X8); - const int txh_idx = get_txh_idx(TX_16X8); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 16; - const int height = 8; - const transform_1d_sse2 col_txfm = col_txfm8x8_arr[tx_type]; - const transform_1d_sse2 row_txfm = row_txfm8x16_arr[tx_type]; - __m128i *buf; - int ud_flip, lr_flip; - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - for (int i = 0; i < 2; i++) { - if (ud_flip) { - load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height); - } else { - load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height); - } - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - transpose_16bit_8x8(buf0, buf1 + 8 * i); - } - - if (lr_flip) { - buf = buf0; - flip_buf_sse2(buf1, buf, width); - } else { - buf = buf1; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - transpose_16bit_8x8(buf, buf); - store_rect_buffer_16bit_to_32bit_w8(buf, output, width, height); - transpose_16bit_8x8(buf + 8, buf + 8); - store_rect_buffer_16bit_to_32bit_w8(buf + 8, output + 8, width, height); -} - -void av1_lowbd_fwd_txfm2d_16x16_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - __m128i buf0[16], buf1[32]; - const int8_t *shift = fwd_txfm_shift_ls[TX_16X16]; - const int txw_idx = get_txw_idx(TX_16X16); - const int txh_idx = get_txh_idx(TX_16X16); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 16; - const int height = 16; - const transform_1d_sse2 col_txfm = col_txfm8x16_arr[tx_type]; - const transform_1d_sse2 row_txfm = row_txfm8x16_arr[tx_type]; - int ud_flip, lr_flip; - - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - for (int i = 0; i < 2; i++) { - if (ud_flip) { - load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height); - } else { - load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height); - } - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - transpose_16bit_8x8(buf0, buf1 + 0 * width + 8 * i); - transpose_16bit_8x8(buf0 + 8, buf1 + 1 * width + 8 * i); - } - - for (int i = 0; i < 2; i++) { - __m128i *buf; - if (lr_flip) { - buf = buf0; - flip_buf_sse2(buf1 + width * i, buf, width); - } else { - buf = buf1 + width * i; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - transpose_16bit_8x8(buf, buf); - store_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, 8); - transpose_16bit_8x8(buf + 8, buf + 8); - store_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8, width, - 8); - } -} - -void av1_lowbd_fwd_txfm2d_16x32_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - __m128i buf0[32], buf1[64]; - const int8_t *shift = fwd_txfm_shift_ls[TX_16X32]; - const int txw_idx = get_txw_idx(TX_16X32); - const int txh_idx = get_txh_idx(TX_16X32); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 16; - const int height = 32; - const transform_1d_sse2 col_txfm = col_txfm8x32_arr[tx_type]; - const transform_1d_sse2 row_txfm = row_txfm8x16_arr[tx_type]; - - if (col_txfm != NULL && row_txfm != NULL) { - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < 2; i++) { - if (ud_flip) { - load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height); - } else { - load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height); - } - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - transpose_16bit_8x8(buf0 + 0 * 8, buf1 + 0 * width + 8 * i); - transpose_16bit_8x8(buf0 + 1 * 8, buf1 + 1 * width + 8 * i); - transpose_16bit_8x8(buf0 + 2 * 8, buf1 + 2 * width + 8 * i); - transpose_16bit_8x8(buf0 + 3 * 8, buf1 + 3 * width + 8 * i); - } - - for (int i = 0; i < 4; i++) { - __m128i *buf; - if (lr_flip) { - buf = buf0; - flip_buf_sse2(buf1 + width * i, buf, width); - } else { - buf = buf1 + width * i; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - transpose_16bit_8x8(buf, buf); - store_rect_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, - 8); - transpose_16bit_8x8(buf + 8, buf + 8); - store_rect_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8, - width, 8); - } - } else { - av1_fwd_txfm2d_16x32_c(input, output, stride, tx_type, bd); - } -} - -void av1_lowbd_fwd_txfm2d_32x8_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - __m128i buf0[32], buf1[32]; - const int8_t *shift = fwd_txfm_shift_ls[TX_32X8]; - const int txw_idx = get_txw_idx(TX_32X8); - const int txh_idx = get_txh_idx(TX_32X8); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 32; - const int height = 8; - const transform_1d_sse2 col_txfm = col_txfm8x8_arr[tx_type]; - const transform_1d_sse2 row_txfm = row_txfm8x32_arr[tx_type]; - - if (col_txfm != NULL && row_txfm != NULL) { - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < 4; i++) { - if (ud_flip) { - load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height); - } else { - load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height); - } - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - transpose_16bit_8x8(buf0, buf1 + 0 * width + 8 * i); - } - - for (int i = 0; i < 1; i++) { - __m128i *buf; - if (lr_flip) { - buf = buf0; - flip_buf_sse2(buf1 + width * i, buf, width); - } else { - buf = buf1 + width * i; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - transpose_16bit_8x8(buf, buf); - store_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, - height); - transpose_16bit_8x8(buf + 8, buf + 8); - store_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8, width, - height); - transpose_16bit_8x8(buf + 16, buf + 16); - store_buffer_16bit_to_32bit_w8(buf + 16, output + 8 * width * i + 16, - width, height); - transpose_16bit_8x8(buf + 24, buf + 24); - store_buffer_16bit_to_32bit_w8(buf + 24, output + 8 * width * i + 24, - width, height); - } - } else { - av1_fwd_txfm2d_32x16_c(input, output, stride, tx_type, bd); - } -} - -void av1_lowbd_fwd_txfm2d_32x16_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - __m128i buf0[32], buf1[64]; - const int8_t *shift = fwd_txfm_shift_ls[TX_32X16]; - const int txw_idx = get_txw_idx(TX_32X16); - const int txh_idx = get_txh_idx(TX_32X16); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 32; - const int height = 16; - const transform_1d_sse2 col_txfm = col_txfm8x16_arr[tx_type]; - const transform_1d_sse2 row_txfm = row_txfm8x32_arr[tx_type]; - - if (col_txfm != NULL && row_txfm != NULL) { - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < 4; i++) { - if (ud_flip) { - load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height); - } else { - load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height); - } - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - transpose_16bit_8x8(buf0, buf1 + 0 * width + 8 * i); - transpose_16bit_8x8(buf0 + 8, buf1 + 1 * width + 8 * i); - } - - for (int i = 0; i < 2; i++) { - __m128i *buf; - if (lr_flip) { - buf = buf0; - flip_buf_sse2(buf1 + width * i, buf, width); - } else { - buf = buf1 + width * i; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - transpose_16bit_8x8(buf, buf); - store_rect_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, - 8); - transpose_16bit_8x8(buf + 8, buf + 8); - store_rect_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8, - width, 8); - transpose_16bit_8x8(buf + 16, buf + 16); - store_rect_buffer_16bit_to_32bit_w8(buf + 16, output + 8 * width * i + 16, - width, 8); - transpose_16bit_8x8(buf + 24, buf + 24); - store_rect_buffer_16bit_to_32bit_w8(buf + 24, output + 8 * width * i + 24, - width, 8); - } - } else { - av1_fwd_txfm2d_32x16_c(input, output, stride, tx_type, bd); - } -} - -void av1_lowbd_fwd_txfm2d_32x32_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - __m128i buf0[32], buf1[128]; - const int8_t *shift = fwd_txfm_shift_ls[TX_32X32]; - const int txw_idx = get_txw_idx(TX_32X32); - const int txh_idx = get_txh_idx(TX_32X32); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = 32; - const int height = 32; - const transform_1d_sse2 col_txfm = col_txfm8x32_arr[tx_type]; - const transform_1d_sse2 row_txfm = row_txfm8x32_arr[tx_type]; - - if (col_txfm != NULL && row_txfm != NULL) { - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < 4; i++) { - if (ud_flip) { - load_buffer_16bit_to_16bit_flip(input + 8 * i, stride, buf0, height); - } else { - load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height); - } - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - transpose_16bit_8x8(buf0 + 0 * 8, buf1 + 0 * width + 8 * i); - transpose_16bit_8x8(buf0 + 1 * 8, buf1 + 1 * width + 8 * i); - transpose_16bit_8x8(buf0 + 2 * 8, buf1 + 2 * width + 8 * i); - transpose_16bit_8x8(buf0 + 3 * 8, buf1 + 3 * width + 8 * i); - } - - for (int i = 0; i < 4; i++) { - __m128i *buf; - if (lr_flip) { - buf = buf0; - flip_buf_sse2(buf1 + width * i, buf, width); - } else { - buf = buf1 + width * i; - } - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - transpose_16bit_8x8(buf, buf); - store_buffer_16bit_to_32bit_w8(buf, output + 8 * width * i, width, 8); - transpose_16bit_8x8(buf + 8, buf + 8); - store_buffer_16bit_to_32bit_w8(buf + 8, output + 8 * width * i + 8, width, - 8); - transpose_16bit_8x8(buf + 16, buf + 16); - store_buffer_16bit_to_32bit_w8(buf + 16, output + 8 * width * i + 16, - width, 8); - transpose_16bit_8x8(buf + 24, buf + 24); - store_buffer_16bit_to_32bit_w8(buf + 24, output + 8 * width * i + 24, - width, 8); - } - } else { - av1_fwd_txfm2d_32x32_c(input, output, stride, tx_type, bd); - } -} - -void av1_lowbd_fwd_txfm2d_64x16_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - (void)tx_type; - assert(tx_type == DCT_DCT); - const TX_SIZE tx_size = TX_64X16; - __m128i buf0[64], buf1[128]; - const int8_t *shift = fwd_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - const transform_1d_sse2 col_txfm = fdct8x16_new_sse2; - const transform_1d_sse2 row_txfm = fdct8x64_new_sse2; - const int width_div8 = (width >> 3); - const int height_div8 = (height >> 3); - - for (int i = 0; i < width_div8; i++) { - load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height); - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - for (int j = 0; j < height_div8; ++j) { - transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i); - } - } - - for (int i = 0; i < height_div8; i++) { - __m128i *buf = buf1 + width * i; - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - int32_t *output8 = output + 8 * 32 * i; - for (int j = 0; j < 4; ++j) { - __m128i *buf8 = buf + 8 * j; - transpose_16bit_8x8(buf8, buf8); - store_buffer_16bit_to_32bit_w8(buf8, output8 + 8 * j, 32, 8); - } - } -} - -void av1_lowbd_fwd_txfm2d_16x64_sse2(const int16_t *input, int32_t *output, - int stride, TX_TYPE tx_type, int bd) { - (void)bd; - (void)tx_type; - assert(tx_type == DCT_DCT); - const TX_SIZE tx_size = TX_16X64; - __m128i buf0[64], buf1[128]; - const int8_t *shift = fwd_txfm_shift_ls[tx_size]; - const int txw_idx = get_txw_idx(tx_size); - const int txh_idx = get_txh_idx(tx_size); - const int cos_bit_col = fwd_cos_bit_col[txw_idx][txh_idx]; - const int cos_bit_row = fwd_cos_bit_row[txw_idx][txh_idx]; - const int width = tx_size_wide[tx_size]; - const int height = tx_size_high[tx_size]; - const transform_1d_sse2 col_txfm = fdct8x64_new_sse2; - const transform_1d_sse2 row_txfm = fdct8x16_new_sse2; - const int width_div8 = (width >> 3); - const int height_div8 = (height >> 3); - - for (int i = 0; i < width_div8; i++) { - load_buffer_16bit_to_16bit(input + 8 * i, stride, buf0, height); - round_shift_16bit(buf0, height, shift[0]); - col_txfm(buf0, buf0, cos_bit_col); - round_shift_16bit(buf0, height, shift[1]); - for (int j = 0; j < height_div8; ++j) { - transpose_16bit_8x8(buf0 + j * 8, buf1 + j * width + 8 * i); - } - } - - for (int i = 0; i < AOMMIN(4, height_div8); i++) { - __m128i *buf = buf1 + width * i; - row_txfm(buf, buf, cos_bit_row); - round_shift_16bit(buf, width, shift[2]); - int32_t *output8 = output + 8 * width * i; - for (int j = 0; j < width_div8; ++j) { - __m128i *buf8 = buf + 8 * j; - transpose_16bit_8x8(buf8, buf8); - store_buffer_16bit_to_32bit_w8(buf8, output8 + 8 * j, width, 8); - } - } - // Zero out the bottom 16x32 area. - memset(output + 16 * 32, 0, 16 * 32 * sizeof(*output)); -} - -static FwdTxfm2dFunc fwd_txfm2d_func_ls[TX_SIZES_ALL] = { - av1_lowbd_fwd_txfm2d_4x4_sse2, // 4x4 transform - av1_lowbd_fwd_txfm2d_8x8_sse2, // 8x8 transform - av1_lowbd_fwd_txfm2d_16x16_sse2, // 16x16 transform - av1_lowbd_fwd_txfm2d_32x32_sse2, // 32x32 transform - NULL, // 64x64 transform - av1_lowbd_fwd_txfm2d_4x8_sse2, // 4x8 transform - av1_lowbd_fwd_txfm2d_8x4_sse2, // 8x4 transform - av1_lowbd_fwd_txfm2d_8x16_sse2, // 8x16 transform - av1_lowbd_fwd_txfm2d_16x8_sse2, // 16x8 transform - av1_lowbd_fwd_txfm2d_16x32_sse2, // 16x32 transform - av1_lowbd_fwd_txfm2d_32x16_sse2, // 32x16 transform - NULL, // 32x64 transform - NULL, // 64x32 transform - av1_lowbd_fwd_txfm2d_4x16_sse2, // 4x16 transform - av1_lowbd_fwd_txfm2d_16x4_sse2, // 16x4 transform - av1_lowbd_fwd_txfm2d_8x32_sse2, // 8x32 transform - av1_lowbd_fwd_txfm2d_32x8_sse2, // 32x8 transform - av1_lowbd_fwd_txfm2d_16x64_sse2, // 16x64 transform - av1_lowbd_fwd_txfm2d_64x16_sse2, // 64x16 transform -}; - -void av1_lowbd_fwd_txfm_sse2(const int16_t *src_diff, tran_low_t *coeff, - int diff_stride, TxfmParam *txfm_param) { - FwdTxfm2dFunc fwd_txfm2d_func = fwd_txfm2d_func_ls[txfm_param->tx_size]; - - if ((fwd_txfm2d_func == NULL) || - (txfm_param->lossless && txfm_param->tx_size == TX_4X4)) - av1_lowbd_fwd_txfm_c(src_diff, coeff, diff_stride, txfm_param); - else - fwd_txfm2d_func(src_diff, coeff, diff_stride, txfm_param->tx_type, - txfm_param->bd); -} diff --git a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.h b/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.h deleted file mode 100644 index 99a6b9082..000000000 --- a/third_party/aom/av1/encoder/x86/av1_fwd_txfm_sse2.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#ifndef AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_SSE2_H_ -#define AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_SSE2_H_ - -#include - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "aom/aom_integer.h" -#include "aom_dsp/x86/transpose_sse2.h" -#include "aom_dsp/x86/txfm_common_sse2.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void fdct8x32_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit); -void fdct8x64_new_sse2(const __m128i *input, __m128i *output, int8_t cos_bit); - -static INLINE void fidentity4x4_new_sse2(const __m128i *const input, - __m128i *const output, - const int8_t cos_bit) { - (void)cos_bit; - const __m128i one = _mm_set1_epi16(1); - - for (int i = 0; i < 4; ++i) { - const __m128i a = _mm_unpacklo_epi16(input[i], one); - const __m128i b = scale_round_sse2(a, NewSqrt2); - output[i] = _mm_packs_epi32(b, b); - } -} - -static INLINE void fidentity8x4_new_sse2(const __m128i *const input, - __m128i *const output, - const int8_t cos_bit) { - (void)cos_bit; - const __m128i one = _mm_set1_epi16(1); - - for (int i = 0; i < 4; ++i) { - const __m128i a_lo = _mm_unpacklo_epi16(input[i], one); - const __m128i a_hi = _mm_unpackhi_epi16(input[i], one); - const __m128i b_lo = scale_round_sse2(a_lo, NewSqrt2); - const __m128i b_hi = scale_round_sse2(a_hi, NewSqrt2); - output[i] = _mm_packs_epi32(b_lo, b_hi); - } -} - -static INLINE void fidentity8x8_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - - output[0] = _mm_adds_epi16(input[0], input[0]); - output[1] = _mm_adds_epi16(input[1], input[1]); - output[2] = _mm_adds_epi16(input[2], input[2]); - output[3] = _mm_adds_epi16(input[3], input[3]); - output[4] = _mm_adds_epi16(input[4], input[4]); - output[5] = _mm_adds_epi16(input[5], input[5]); - output[6] = _mm_adds_epi16(input[6], input[6]); - output[7] = _mm_adds_epi16(input[7], input[7]); -} - -static INLINE void fidentity8x16_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - const __m128i one = _mm_set1_epi16(1); - - for (int i = 0; i < 16; ++i) { - const __m128i a_lo = _mm_unpacklo_epi16(input[i], one); - const __m128i a_hi = _mm_unpackhi_epi16(input[i], one); - const __m128i b_lo = scale_round_sse2(a_lo, 2 * NewSqrt2); - const __m128i b_hi = scale_round_sse2(a_hi, 2 * NewSqrt2); - output[i] = _mm_packs_epi32(b_lo, b_hi); - } -} - -static INLINE void fidentity8x32_new_sse2(const __m128i *input, __m128i *output, - int8_t cos_bit) { - (void)cos_bit; - for (int i = 0; i < 32; ++i) { - output[i] = _mm_slli_epi16(input[i], 2); - } -} - -static const transform_1d_sse2 col_txfm8x32_arr[TX_TYPES] = { - fdct8x32_new_sse2, // DCT_DCT - NULL, // ADST_DCT - NULL, // DCT_ADST - NULL, // ADST_ADST - NULL, // FLIPADST_DCT - NULL, // DCT_FLIPADST - NULL, // FLIPADST_FLIPADST - NULL, // ADST_FLIPADST - NULL, // FLIPADST_ADST - fidentity8x32_new_sse2, // IDTX - fdct8x32_new_sse2, // V_DCT - fidentity8x32_new_sse2, // H_DCT - NULL, // V_ADST - NULL, // H_ADST - NULL, // V_FLIPADST - NULL // H_FLIPADST -}; - -#ifdef __cplusplus -} -#endif - -#endif // AOM_AV1_ENCODER_X86_AV1_FWD_TXFM_SSE2_H_ diff --git a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c b/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c deleted file mode 100644 index b58911fcb..000000000 --- a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_avx2.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include - -#include "config/av1_rtcd.h" - -#include "aom/aom_integer.h" -#include "aom_dsp/aom_dsp_common.h" - -static INLINE void init_one_qp(const __m128i *p, __m256i *qp) { - const __m128i zero = _mm_setzero_si128(); - const __m128i dc = _mm_unpacklo_epi16(*p, zero); - const __m128i ac = _mm_unpackhi_epi16(*p, zero); - *qp = _mm256_insertf128_si256(_mm256_castsi128_si256(dc), ac, 1); -} - -static INLINE void update_qp(__m256i *qp) { - qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11); - qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11); - qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11); -} - -static INLINE void init_qp(const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *dequant_ptr, int log_scale, - __m256i *qp) { - __m128i round = _mm_loadu_si128((const __m128i *)round_ptr); - if (log_scale) { - const __m128i round_scale = _mm_set1_epi16(1 << (15 - log_scale)); - round = _mm_mulhrs_epi16(round, round_scale); - } - const __m128i quant = _mm_loadu_si128((const __m128i *)quant_ptr); - const __m128i dequant = _mm_loadu_si128((const __m128i *)dequant_ptr); - - init_one_qp(&round, &qp[0]); - init_one_qp(&quant, &qp[1]); - init_one_qp(&dequant, &qp[2]); -} - -static INLINE void quantize(const __m256i *qp, __m256i *c, - const int16_t *iscan_ptr, int log_scale, - tran_low_t *qcoeff, tran_low_t *dqcoeff, - __m256i *eob) { - const __m256i abs_coeff = _mm256_abs_epi32(*c); - __m256i q = _mm256_add_epi32(abs_coeff, qp[0]); - - __m256i q_lo = _mm256_mul_epi32(q, qp[1]); - __m256i q_hi = _mm256_srli_epi64(q, 32); - const __m256i qp_hi = _mm256_srli_epi64(qp[1], 32); - q_hi = _mm256_mul_epi32(q_hi, qp_hi); - q_lo = _mm256_srli_epi64(q_lo, 16 - log_scale); - q_hi = _mm256_srli_epi64(q_hi, 16 - log_scale); - q_hi = _mm256_slli_epi64(q_hi, 32); - q = _mm256_or_si256(q_lo, q_hi); - const __m256i abs_s = _mm256_slli_epi32(abs_coeff, 1 + log_scale); - const __m256i mask = _mm256_cmpgt_epi32(qp[2], abs_s); - q = _mm256_andnot_si256(mask, q); - - __m256i dq = _mm256_mullo_epi32(q, qp[2]); - dq = _mm256_srai_epi32(dq, log_scale); - q = _mm256_sign_epi32(q, *c); - dq = _mm256_sign_epi32(dq, *c); - - _mm256_storeu_si256((__m256i *)qcoeff, q); - _mm256_storeu_si256((__m256i *)dqcoeff, dq); - - const __m128i isc = _mm_loadu_si128((const __m128i *)iscan_ptr); - const __m128i zr = _mm_setzero_si128(); - const __m128i lo = _mm_unpacklo_epi16(isc, zr); - const __m128i hi = _mm_unpackhi_epi16(isc, zr); - const __m256i iscan = - _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1); - - const __m256i zero = _mm256_setzero_si256(); - const __m256i zc = _mm256_cmpeq_epi32(dq, zero); - const __m256i nz = _mm256_cmpeq_epi32(zc, zero); - __m256i cur_eob = _mm256_sub_epi32(iscan, nz); - cur_eob = _mm256_and_si256(cur_eob, nz); - *eob = _mm256_max_epi32(cur_eob, *eob); -} - -void av1_highbd_quantize_fp_avx2( - const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, - const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan, int log_scale) { - (void)scan; - (void)zbin_ptr; - (void)quant_shift_ptr; - const unsigned int step = 8; - __m256i qp[3], coeff; - - init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, qp); - coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr); - - __m256i eob = _mm256_setzero_si256(); - quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob); - - coeff_ptr += step; - qcoeff_ptr += step; - dqcoeff_ptr += step; - iscan += step; - n_coeffs -= step; - - update_qp(qp); - while (n_coeffs > 0) { - coeff = _mm256_loadu_si256((const __m256i *)coeff_ptr); - quantize(qp, &coeff, iscan, log_scale, qcoeff_ptr, dqcoeff_ptr, &eob); - - coeff_ptr += step; - qcoeff_ptr += step; - dqcoeff_ptr += step; - iscan += step; - n_coeffs -= step; - } - { - __m256i eob_s; - eob_s = _mm256_shuffle_epi32(eob, 0xe); - eob = _mm256_max_epi16(eob, eob_s); - eob_s = _mm256_shufflelo_epi16(eob, 0xe); - eob = _mm256_max_epi16(eob, eob_s); - eob_s = _mm256_shufflelo_epi16(eob, 1); - eob = _mm256_max_epi16(eob, eob_s); - const __m128i final_eob = _mm_max_epi16(_mm256_castsi256_si128(eob), - _mm256_extractf128_si256(eob, 1)); - *eob_ptr = _mm_extract_epi16(final_eob, 0); - } -} diff --git a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c b/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c deleted file mode 100644 index 40b3b460b..000000000 --- a/third_party/aom/av1/encoder/x86/av1_highbd_quantize_sse4.c +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include - -#include "config/av1_rtcd.h" - -#include "aom_dsp/aom_dsp_common.h" -#include "aom_dsp/x86/synonyms.h" - -// Coefficient quantization phase 1 -// param[0-2] : rounding/quan/dequan constants -static INLINE void quantize_coeff_phase1(__m128i *coeff, const __m128i *param, - const int shift, const int scale, - __m128i *qcoeff, __m128i *dquan, - __m128i *sign) { - const __m128i zero = _mm_setzero_si128(); - const __m128i one = _mm_set1_epi32(1); - - *sign = _mm_cmplt_epi32(*coeff, zero); - *sign = _mm_or_si128(*sign, one); - *coeff = _mm_abs_epi32(*coeff); - - qcoeff[0] = _mm_add_epi32(*coeff, param[0]); - qcoeff[1] = _mm_unpackhi_epi32(qcoeff[0], zero); - qcoeff[0] = _mm_unpacklo_epi32(qcoeff[0], zero); - - qcoeff[0] = _mm_mul_epi32(qcoeff[0], param[1]); - qcoeff[0] = _mm_srli_epi64(qcoeff[0], shift); - dquan[0] = _mm_mul_epi32(qcoeff[0], param[2]); - dquan[0] = _mm_srli_epi64(dquan[0], scale); - const __m128i abs_s = _mm_slli_epi32(*coeff, 1 + scale); - qcoeff[2] = _mm_cmplt_epi32(abs_s, param[3]); -} - -// Coefficient quantization phase 2 -static INLINE void quantize_coeff_phase2(__m128i *qcoeff, __m128i *dquan, - const __m128i *sign, - const __m128i *param, const int shift, - const int scale, tran_low_t *qAddr, - tran_low_t *dqAddr) { - __m128i mask0L = _mm_set_epi32(-1, -1, 0, 0); - __m128i mask0H = _mm_set_epi32(0, 0, -1, -1); - - qcoeff[1] = _mm_mul_epi32(qcoeff[1], param[1]); - qcoeff[1] = _mm_srli_epi64(qcoeff[1], shift); - dquan[1] = _mm_mul_epi32(qcoeff[1], param[2]); - dquan[1] = _mm_srli_epi64(dquan[1], scale); - - // combine L&H - qcoeff[0] = _mm_shuffle_epi32(qcoeff[0], 0xd8); - qcoeff[1] = _mm_shuffle_epi32(qcoeff[1], 0x8d); - - qcoeff[0] = _mm_and_si128(qcoeff[0], mask0H); - qcoeff[1] = _mm_and_si128(qcoeff[1], mask0L); - - dquan[0] = _mm_shuffle_epi32(dquan[0], 0xd8); - dquan[1] = _mm_shuffle_epi32(dquan[1], 0x8d); - - dquan[0] = _mm_and_si128(dquan[0], mask0H); - dquan[1] = _mm_and_si128(dquan[1], mask0L); - - qcoeff[0] = _mm_or_si128(qcoeff[0], qcoeff[1]); - dquan[0] = _mm_or_si128(dquan[0], dquan[1]); - - qcoeff[0] = _mm_sign_epi32(qcoeff[0], *sign); - dquan[0] = _mm_sign_epi32(dquan[0], *sign); - qcoeff[0] = _mm_andnot_si128(qcoeff[2], qcoeff[0]); - dquan[0] = _mm_andnot_si128(qcoeff[2], dquan[0]); - _mm_storeu_si128((__m128i *)qAddr, qcoeff[0]); - _mm_storeu_si128((__m128i *)dqAddr, dquan[0]); -} - -static INLINE void find_eob(tran_low_t *qcoeff_ptr, const int16_t *iscan, - __m128i *eob) { - const __m128i zero = _mm_setzero_si128(); - __m128i mask, iscanIdx; - const __m128i q0 = _mm_loadu_si128((__m128i const *)qcoeff_ptr); - const __m128i q1 = _mm_loadu_si128((__m128i const *)(qcoeff_ptr + 4)); - __m128i nz_flag0 = _mm_cmpeq_epi32(q0, zero); - __m128i nz_flag1 = _mm_cmpeq_epi32(q1, zero); - - nz_flag0 = _mm_cmpeq_epi32(nz_flag0, zero); - nz_flag1 = _mm_cmpeq_epi32(nz_flag1, zero); - - mask = _mm_packs_epi32(nz_flag0, nz_flag1); - iscanIdx = _mm_loadu_si128((__m128i const *)iscan); - iscanIdx = _mm_sub_epi16(iscanIdx, mask); - iscanIdx = _mm_and_si128(iscanIdx, mask); - *eob = _mm_max_epi16(*eob, iscanIdx); -} - -static INLINE uint16_t get_accumulated_eob(__m128i *eob) { - __m128i eob_shuffled; - uint16_t eobValue; - eob_shuffled = _mm_shuffle_epi32(*eob, 0xe); - *eob = _mm_max_epi16(*eob, eob_shuffled); - eob_shuffled = _mm_shufflelo_epi16(*eob, 0xe); - *eob = _mm_max_epi16(*eob, eob_shuffled); - eob_shuffled = _mm_shufflelo_epi16(*eob, 0x1); - *eob = _mm_max_epi16(*eob, eob_shuffled); - eobValue = _mm_extract_epi16(*eob, 0); - return eobValue; -} - -void av1_highbd_quantize_fp_sse4_1( - const tran_low_t *coeff_ptr, intptr_t count, const int16_t *zbin_ptr, - const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan, int log_scale) { - __m128i coeff[2], qcoeff[3], dequant[2], qparam[4], coeff_sign; - __m128i eob = _mm_setzero_si128(); - const tran_low_t *src = coeff_ptr; - tran_low_t *quanAddr = qcoeff_ptr; - tran_low_t *dquanAddr = dqcoeff_ptr; - const int shift = 16 - log_scale; - const int coeff_stride = 4; - const int quan_stride = coeff_stride; - (void)zbin_ptr; - (void)quant_shift_ptr; - (void)scan; - - memset(quanAddr, 0, count * sizeof(quanAddr[0])); - memset(dquanAddr, 0, count * sizeof(dquanAddr[0])); - - coeff[0] = _mm_loadu_si128((__m128i const *)src); - const int round1 = ROUND_POWER_OF_TWO(round_ptr[1], log_scale); - const int round0 = ROUND_POWER_OF_TWO(round_ptr[0], log_scale); - - qparam[0] = _mm_set_epi32(round1, round1, round1, round0); - qparam[1] = xx_set_64_from_32i(quant_ptr[1], quant_ptr[0]); - qparam[2] = xx_set_64_from_32i(dequant_ptr[1], dequant_ptr[0]); - qparam[3] = _mm_set_epi32(dequant_ptr[1], dequant_ptr[1], dequant_ptr[1], - dequant_ptr[0]); - - // DC and first 3 AC - quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale, qcoeff, dequant, - &coeff_sign); - - // update round/quan/dquan for AC - qparam[0] = _mm_unpackhi_epi64(qparam[0], qparam[0]); - qparam[1] = xx_set1_64_from_32i(quant_ptr[1]); - qparam[2] = xx_set1_64_from_32i(dequant_ptr[1]); - qparam[3] = _mm_set1_epi32(dequant_ptr[1]); - quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift, log_scale, - quanAddr, dquanAddr); - - // next 4 AC - coeff[1] = _mm_loadu_si128((__m128i const *)(src + coeff_stride)); - quantize_coeff_phase1(&coeff[1], qparam, shift, log_scale, qcoeff, dequant, - &coeff_sign); - quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift, log_scale, - quanAddr + quan_stride, dquanAddr + quan_stride); - - find_eob(quanAddr, iscan, &eob); - - count -= 8; - - // loop for the rest of AC - while (count > 0) { - src += coeff_stride << 1; - quanAddr += quan_stride << 1; - dquanAddr += quan_stride << 1; - iscan += quan_stride << 1; - - coeff[0] = _mm_loadu_si128((__m128i const *)src); - coeff[1] = _mm_loadu_si128((__m128i const *)(src + coeff_stride)); - - quantize_coeff_phase1(&coeff[0], qparam, shift, log_scale, qcoeff, dequant, - &coeff_sign); - quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift, - log_scale, quanAddr, dquanAddr); - - quantize_coeff_phase1(&coeff[1], qparam, shift, log_scale, qcoeff, dequant, - &coeff_sign); - quantize_coeff_phase2(qcoeff, dequant, &coeff_sign, qparam, shift, - log_scale, quanAddr + quan_stride, - dquanAddr + quan_stride); - - find_eob(quanAddr, iscan, &eob); - - count -= 8; - } - *eob_ptr = get_accumulated_eob(&eob); -} diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c b/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c deleted file mode 100644 index df22aaba7..000000000 --- a/third_party/aom/av1/encoder/x86/av1_quantize_avx2.c +++ /dev/null @@ -1,330 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include - -#include "config/av1_rtcd.h" - -#include "aom/aom_integer.h" -#include "aom_dsp/aom_dsp_common.h" - -static INLINE void read_coeff(const tran_low_t *coeff, __m256i *c) { - if (sizeof(tran_low_t) == 4) { - const __m256i x0 = _mm256_loadu_si256((const __m256i *)coeff); - const __m256i x1 = _mm256_loadu_si256((const __m256i *)coeff + 1); - *c = _mm256_packs_epi32(x0, x1); - *c = _mm256_permute4x64_epi64(*c, 0xD8); - } else { - *c = _mm256_loadu_si256((const __m256i *)coeff); - } -} - -static INLINE void write_zero(tran_low_t *qcoeff) { - const __m256i zero = _mm256_setzero_si256(); - if (sizeof(tran_low_t) == 4) { - _mm256_storeu_si256((__m256i *)qcoeff, zero); - _mm256_storeu_si256((__m256i *)qcoeff + 1, zero); - } else { - _mm256_storeu_si256((__m256i *)qcoeff, zero); - } -} - -static INLINE void init_one_qp(const __m128i *p, __m256i *qp) { - const __m128i ac = _mm_unpackhi_epi64(*p, *p); - *qp = _mm256_insertf128_si256(_mm256_castsi128_si256(*p), ac, 1); -} - -static INLINE void init_qp(const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *dequant_ptr, int log_scale, - __m256i *thr, __m256i *qp) { - __m128i round = _mm_loadu_si128((const __m128i *)round_ptr); - const __m128i quant = _mm_loadu_si128((const __m128i *)quant_ptr); - const __m128i dequant = _mm_loadu_si128((const __m128i *)dequant_ptr); - - if (log_scale > 0) { - const __m128i rnd = _mm_set1_epi16((int16_t)1 << (log_scale - 1)); - round = _mm_add_epi16(round, rnd); - round = _mm_srai_epi16(round, log_scale); - } - - init_one_qp(&round, &qp[0]); - init_one_qp(&quant, &qp[1]); - - if (log_scale == 1) { - qp[1] = _mm256_slli_epi16(qp[1], log_scale); - } - - init_one_qp(&dequant, &qp[2]); - *thr = _mm256_srai_epi16(qp[2], 1 + log_scale); -} - -static INLINE void update_qp(int log_scale, __m256i *thr, __m256i *qp) { - qp[0] = _mm256_permute2x128_si256(qp[0], qp[0], 0x11); - qp[1] = _mm256_permute2x128_si256(qp[1], qp[1], 0x11); - qp[2] = _mm256_permute2x128_si256(qp[2], qp[2], 0x11); - *thr = _mm256_srai_epi16(qp[2], 1 + log_scale); -} - -#define store_quan(q, addr) \ - do { \ - __m256i sign_bits = _mm256_srai_epi16(q, 15); \ - __m256i y0 = _mm256_unpacklo_epi16(q, sign_bits); \ - __m256i y1 = _mm256_unpackhi_epi16(q, sign_bits); \ - __m256i x0 = _mm256_permute2x128_si256(y0, y1, 0x20); \ - __m256i x1 = _mm256_permute2x128_si256(y0, y1, 0x31); \ - _mm256_storeu_si256((__m256i *)addr, x0); \ - _mm256_storeu_si256((__m256i *)addr + 1, x1); \ - } while (0) - -#define store_two_quan(q, addr1, dq, addr2) \ - do { \ - if (sizeof(tran_low_t) == 4) { \ - store_quan(q, addr1); \ - store_quan(dq, addr2); \ - } else { \ - _mm256_storeu_si256((__m256i *)addr1, q); \ - _mm256_storeu_si256((__m256i *)addr2, dq); \ - } \ - } while (0) - -static INLINE uint16_t quant_gather_eob(__m256i eob) { - const __m128i eob_lo = _mm256_castsi256_si128(eob); - const __m128i eob_hi = _mm256_extractf128_si256(eob, 1); - __m128i eob_s = _mm_max_epi16(eob_lo, eob_hi); - eob_s = _mm_subs_epu16(_mm_set1_epi16(INT16_MAX), eob_s); - eob_s = _mm_minpos_epu16(eob_s); - return INT16_MAX - _mm_extract_epi16(eob_s, 0); -} - -static INLINE void quantize(const __m256i *thr, const __m256i *qp, __m256i *c, - const int16_t *iscan_ptr, tran_low_t *qcoeff, - tran_low_t *dqcoeff, __m256i *eob) { - const __m256i abs_coeff = _mm256_abs_epi16(*c); - __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr); - mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs_coeff, *thr)); - const int nzflag = _mm256_movemask_epi8(mask); - - if (nzflag) { - __m256i q = _mm256_adds_epi16(abs_coeff, qp[0]); - q = _mm256_mulhi_epi16(q, qp[1]); - q = _mm256_sign_epi16(q, *c); - const __m256i dq = _mm256_mullo_epi16(q, qp[2]); - - store_two_quan(q, qcoeff, dq, dqcoeff); - const __m256i zero = _mm256_setzero_si256(); - const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr); - const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero); - const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero); - __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff); - cur_eob = _mm256_and_si256(cur_eob, nzero_coeff); - *eob = _mm256_max_epi16(*eob, cur_eob); - } else { - write_zero(qcoeff); - write_zero(dqcoeff); - } -} - -void av1_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - const int16_t *zbin_ptr, const int16_t *round_ptr, - const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan_ptr, const int16_t *iscan_ptr) { - (void)scan_ptr; - (void)zbin_ptr; - (void)quant_shift_ptr; - const unsigned int step = 16; - - __m256i qp[3]; - __m256i coeff, thr; - const int log_scale = 0; - - init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp); - read_coeff(coeff_ptr, &coeff); - - __m256i eob = _mm256_setzero_si256(); - quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob); - - coeff_ptr += step; - qcoeff_ptr += step; - dqcoeff_ptr += step; - iscan_ptr += step; - n_coeffs -= step; - - update_qp(log_scale, &thr, qp); - - while (n_coeffs > 0) { - read_coeff(coeff_ptr, &coeff); - quantize(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob); - - coeff_ptr += step; - qcoeff_ptr += step; - dqcoeff_ptr += step; - iscan_ptr += step; - n_coeffs -= step; - } - *eob_ptr = quant_gather_eob(eob); -} - -static INLINE void quantize_32x32(const __m256i *thr, const __m256i *qp, - __m256i *c, const int16_t *iscan_ptr, - tran_low_t *qcoeff, tran_low_t *dqcoeff, - __m256i *eob) { - const __m256i abs_coeff = _mm256_abs_epi16(*c); - __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr); - mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs_coeff, *thr)); - const int nzflag = _mm256_movemask_epi8(mask); - - if (nzflag) { - __m256i q = _mm256_adds_epi16(abs_coeff, qp[0]); - q = _mm256_mulhi_epu16(q, qp[1]); - - __m256i dq = _mm256_mullo_epi16(q, qp[2]); - dq = _mm256_srli_epi16(dq, 1); - - q = _mm256_sign_epi16(q, *c); - dq = _mm256_sign_epi16(dq, *c); - - store_two_quan(q, qcoeff, dq, dqcoeff); - const __m256i zero = _mm256_setzero_si256(); - const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr); - const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero); - const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero); - __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff); - cur_eob = _mm256_and_si256(cur_eob, nzero_coeff); - *eob = _mm256_max_epi16(*eob, cur_eob); - } else { - write_zero(qcoeff); - write_zero(dqcoeff); - } -} - -void av1_quantize_fp_32x32_avx2( - const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, - const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan_ptr, const int16_t *iscan_ptr) { - (void)scan_ptr; - (void)zbin_ptr; - (void)quant_shift_ptr; - const unsigned int step = 16; - - __m256i qp[3]; - __m256i coeff, thr; - const int log_scale = 1; - - init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp); - read_coeff(coeff_ptr, &coeff); - - __m256i eob = _mm256_setzero_si256(); - quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob); - - coeff_ptr += step; - qcoeff_ptr += step; - dqcoeff_ptr += step; - iscan_ptr += step; - n_coeffs -= step; - - update_qp(log_scale, &thr, qp); - - while (n_coeffs > 0) { - read_coeff(coeff_ptr, &coeff); - quantize_32x32(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob); - - coeff_ptr += step; - qcoeff_ptr += step; - dqcoeff_ptr += step; - iscan_ptr += step; - n_coeffs -= step; - } - *eob_ptr = quant_gather_eob(eob); -} - -static INLINE void quantize_64x64(const __m256i *thr, const __m256i *qp, - __m256i *c, const int16_t *iscan_ptr, - tran_low_t *qcoeff, tran_low_t *dqcoeff, - __m256i *eob) { - const __m256i abs_coeff = _mm256_abs_epi16(*c); - __m256i mask = _mm256_cmpgt_epi16(abs_coeff, *thr); - mask = _mm256_or_si256(mask, _mm256_cmpeq_epi16(abs_coeff, *thr)); - const int nzflag = _mm256_movemask_epi8(mask); - - if (nzflag) { - __m256i q = _mm256_adds_epi16(abs_coeff, qp[0]); - __m256i qh = _mm256_mulhi_epi16(q, qp[1]); - __m256i ql = _mm256_mullo_epi16(q, qp[1]); - qh = _mm256_slli_epi16(qh, 2); - ql = _mm256_srli_epi16(ql, 14); - q = _mm256_or_si256(qh, ql); - const __m256i dqh = _mm256_slli_epi16(_mm256_mulhi_epi16(q, qp[2]), 14); - const __m256i dql = _mm256_srli_epi16(_mm256_mullo_epi16(q, qp[2]), 2); - __m256i dq = _mm256_or_si256(dqh, dql); - - q = _mm256_sign_epi16(q, *c); - dq = _mm256_sign_epi16(dq, *c); - - store_two_quan(q, qcoeff, dq, dqcoeff); - const __m256i zero = _mm256_setzero_si256(); - const __m256i iscan = _mm256_loadu_si256((const __m256i *)iscan_ptr); - const __m256i zero_coeff = _mm256_cmpeq_epi16(dq, zero); - const __m256i nzero_coeff = _mm256_cmpeq_epi16(zero_coeff, zero); - __m256i cur_eob = _mm256_sub_epi16(iscan, nzero_coeff); - cur_eob = _mm256_and_si256(cur_eob, nzero_coeff); - *eob = _mm256_max_epi16(*eob, cur_eob); - } else { - write_zero(qcoeff); - write_zero(dqcoeff); - } -} - -void av1_quantize_fp_64x64_avx2( - const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, - const int16_t *round_ptr, const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan_ptr, const int16_t *iscan_ptr) { - (void)scan_ptr; - (void)zbin_ptr; - (void)quant_shift_ptr; - const unsigned int step = 16; - - __m256i qp[3]; - __m256i coeff, thr; - const int log_scale = 2; - - init_qp(round_ptr, quant_ptr, dequant_ptr, log_scale, &thr, qp); - read_coeff(coeff_ptr, &coeff); - - __m256i eob = _mm256_setzero_si256(); - quantize_64x64(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob); - - coeff_ptr += step; - qcoeff_ptr += step; - dqcoeff_ptr += step; - iscan_ptr += step; - n_coeffs -= step; - - update_qp(log_scale, &thr, qp); - - while (n_coeffs > 0) { - read_coeff(coeff_ptr, &coeff); - quantize_64x64(&thr, qp, &coeff, iscan_ptr, qcoeff_ptr, dqcoeff_ptr, &eob); - - coeff_ptr += step; - qcoeff_ptr += step; - dqcoeff_ptr += step; - iscan_ptr += step; - n_coeffs -= step; - } - *eob_ptr = quant_gather_eob(eob); -} diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c b/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c deleted file mode 100644 index b07e7717f..000000000 --- a/third_party/aom/av1/encoder/x86/av1_quantize_sse2.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include - -#include "config/av1_rtcd.h" - -#include "aom/aom_integer.h" - -static INLINE void read_coeff(const tran_low_t *coeff, intptr_t offset, - __m128i *c0, __m128i *c1) { - const tran_low_t *addr = coeff + offset; - if (sizeof(tran_low_t) == 4) { - const __m128i x0 = _mm_load_si128((const __m128i *)addr); - const __m128i x1 = _mm_load_si128((const __m128i *)addr + 1); - const __m128i x2 = _mm_load_si128((const __m128i *)addr + 2); - const __m128i x3 = _mm_load_si128((const __m128i *)addr + 3); - *c0 = _mm_packs_epi32(x0, x1); - *c1 = _mm_packs_epi32(x2, x3); - } else { - *c0 = _mm_load_si128((const __m128i *)addr); - *c1 = _mm_load_si128((const __m128i *)addr + 1); - } -} - -static INLINE void write_qcoeff(const __m128i *qc0, const __m128i *qc1, - tran_low_t *qcoeff, intptr_t offset) { - tran_low_t *addr = qcoeff + offset; - if (sizeof(tran_low_t) == 4) { - const __m128i zero = _mm_setzero_si128(); - __m128i sign_bits = _mm_cmplt_epi16(*qc0, zero); - __m128i y0 = _mm_unpacklo_epi16(*qc0, sign_bits); - __m128i y1 = _mm_unpackhi_epi16(*qc0, sign_bits); - _mm_store_si128((__m128i *)addr, y0); - _mm_store_si128((__m128i *)addr + 1, y1); - - sign_bits = _mm_cmplt_epi16(*qc1, zero); - y0 = _mm_unpacklo_epi16(*qc1, sign_bits); - y1 = _mm_unpackhi_epi16(*qc1, sign_bits); - _mm_store_si128((__m128i *)addr + 2, y0); - _mm_store_si128((__m128i *)addr + 3, y1); - } else { - _mm_store_si128((__m128i *)addr, *qc0); - _mm_store_si128((__m128i *)addr + 1, *qc1); - } -} - -static INLINE void write_zero(tran_low_t *qcoeff, intptr_t offset) { - const __m128i zero = _mm_setzero_si128(); - tran_low_t *addr = qcoeff + offset; - if (sizeof(tran_low_t) == 4) { - _mm_store_si128((__m128i *)addr, zero); - _mm_store_si128((__m128i *)addr + 1, zero); - _mm_store_si128((__m128i *)addr + 2, zero); - _mm_store_si128((__m128i *)addr + 3, zero); - } else { - _mm_store_si128((__m128i *)addr, zero); - _mm_store_si128((__m128i *)addr + 1, zero); - } -} - -static INLINE void quantize(const int16_t *iscan_ptr, - const tran_low_t *coeff_ptr, intptr_t n_coeffs, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const __m128i *round0, const __m128i *round1, - const __m128i *quant0, const __m128i *quant1, - const __m128i *dequant0, const __m128i *dequant1, - const __m128i *thr0, const __m128i *thr1, - __m128i *eob) { - __m128i coeff0, coeff1; - // Do DC and first 15 AC - read_coeff(coeff_ptr, n_coeffs, &coeff0, &coeff1); - - // Poor man's sign extract - const __m128i coeff0_sign = _mm_srai_epi16(coeff0, 15); - const __m128i coeff1_sign = _mm_srai_epi16(coeff1, 15); - __m128i qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign); - __m128i qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign); - qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); - qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - const __m128i mask0 = _mm_or_si128(_mm_cmpgt_epi16(qcoeff0, *thr0), - _mm_cmpeq_epi16(qcoeff0, *thr0)); - const __m128i mask1 = _mm_or_si128(_mm_cmpgt_epi16(qcoeff1, *thr1), - _mm_cmpeq_epi16(qcoeff1, *thr1)); - const int16_t nzflag = _mm_movemask_epi8(mask0) | _mm_movemask_epi8(mask1); - - if (nzflag) { - qcoeff0 = _mm_adds_epi16(qcoeff0, *round0); - qcoeff1 = _mm_adds_epi16(qcoeff1, *round1); - const __m128i qtmp0 = _mm_mulhi_epi16(qcoeff0, *quant0); - const __m128i qtmp1 = _mm_mulhi_epi16(qcoeff1, *quant1); - - // Reinsert signs - qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign); - qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign); - qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); - qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); - - write_qcoeff(&qcoeff0, &qcoeff1, qcoeff_ptr, n_coeffs); - - coeff0 = _mm_mullo_epi16(qcoeff0, *dequant0); - coeff1 = _mm_mullo_epi16(qcoeff1, *dequant1); - - write_qcoeff(&coeff0, &coeff1, dqcoeff_ptr, n_coeffs); - - const __m128i zero = _mm_setzero_si128(); - // Scan for eob - const __m128i zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero); - const __m128i zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); - const __m128i nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); - const __m128i nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); - const __m128i iscan0 = - _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs)); - const __m128i iscan1 = - _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1); - // Add one to convert from indices to counts - const __m128i iscan0_nz = _mm_sub_epi16(iscan0, nzero_coeff0); - const __m128i iscan1_nz = _mm_sub_epi16(iscan1, nzero_coeff1); - const __m128i eob0 = _mm_and_si128(iscan0_nz, nzero_coeff0); - const __m128i eob1 = _mm_and_si128(iscan1_nz, nzero_coeff1); - const __m128i eob2 = _mm_max_epi16(eob0, eob1); - *eob = _mm_max_epi16(*eob, eob2); - } else { - write_zero(qcoeff_ptr, n_coeffs); - write_zero(dqcoeff_ptr, n_coeffs); - } -} - -void av1_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - const int16_t *zbin_ptr, const int16_t *round_ptr, - const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, - tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, - const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan_ptr, const int16_t *iscan_ptr) { - (void)scan_ptr; - (void)zbin_ptr; - (void)quant_shift_ptr; - - coeff_ptr += n_coeffs; - iscan_ptr += n_coeffs; - qcoeff_ptr += n_coeffs; - dqcoeff_ptr += n_coeffs; - n_coeffs = -n_coeffs; - - const __m128i round0 = _mm_load_si128((const __m128i *)round_ptr); - const __m128i round1 = _mm_unpackhi_epi64(round0, round0); - const __m128i quant0 = _mm_load_si128((const __m128i *)quant_ptr); - const __m128i quant1 = _mm_unpackhi_epi64(quant0, quant0); - const __m128i dequant0 = _mm_load_si128((const __m128i *)dequant_ptr); - const __m128i dequant1 = _mm_unpackhi_epi64(dequant0, dequant0); - const __m128i thr0 = _mm_srai_epi16(dequant0, 1); - const __m128i thr1 = _mm_srai_epi16(dequant1, 1); - __m128i eob = _mm_setzero_si128(); - - quantize(iscan_ptr, coeff_ptr, n_coeffs, qcoeff_ptr, dqcoeff_ptr, &round0, - &round1, &quant0, &quant1, &dequant0, &dequant1, &thr0, &thr1, &eob); - - n_coeffs += 8 * 2; - - // AC only loop - while (n_coeffs < 0) { - quantize(iscan_ptr, coeff_ptr, n_coeffs, qcoeff_ptr, dqcoeff_ptr, &round1, - &round1, &quant1, &quant1, &dequant1, &dequant1, &thr1, &thr1, - &eob); - n_coeffs += 8 * 2; - } - - // Accumulate EOB - { - __m128i eob_shuffled; - eob_shuffled = _mm_shuffle_epi32(eob, 0xe); - eob = _mm_max_epi16(eob, eob_shuffled); - eob_shuffled = _mm_shufflelo_epi16(eob, 0xe); - eob = _mm_max_epi16(eob, eob_shuffled); - eob_shuffled = _mm_shufflelo_epi16(eob, 0x1); - eob = _mm_max_epi16(eob, eob_shuffled); - *eob_ptr = _mm_extract_epi16(eob, 1); - } -} diff --git a/third_party/aom/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm b/third_party/aom/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm deleted file mode 100644 index ad4ae274e..000000000 --- a/third_party/aom/av1/encoder/x86/av1_quantize_ssse3_x86_64.asm +++ /dev/null @@ -1,204 +0,0 @@ -; -; Copyright (c) 2016, Alliance for Open Media. All rights reserved -; -; This source code is subject to the terms of the BSD 2 Clause License and -; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License -; was not distributed with this source code in the LICENSE file, you can -; obtain it at www.aomedia.org/license/software. If the Alliance for Open -; Media Patent License 1.0 was not distributed with this source code in the -; PATENTS file, you can obtain it at www.aomedia.org/license/patent. -; - -; - -%define private_prefix av1 - -%include "third_party/x86inc/x86inc.asm" - -SECTION_RODATA -pw_1: times 8 dw 1 - -SECTION .text - -%macro QUANTIZE_FP 2 -cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ - shift, qcoeff, dqcoeff, dequant, \ - eob, scan, iscan - cmp dword skipm, 0 - jne .blank - - ; actual quantize loop - setup pointers, rounders, etc. - movifnidn coeffq, coeffmp - movifnidn ncoeffq, ncoeffmp - mov r2, dequantmp - movifnidn zbinq, zbinmp - movifnidn roundq, roundmp - movifnidn quantq, quantmp - mova m1, [roundq] ; m1 = round - mova m2, [quantq] ; m2 = quant -%ifidn %1, fp_32x32 - pcmpeqw m5, m5 - psrlw m5, 15 - paddw m1, m5 - psrlw m1, 1 ; m1 = (m1 + 1) / 2 -%endif - mova m3, [r2q] ; m3 = dequant - mov r3, qcoeffmp - mov r4, dqcoeffmp - mov r5, iscanmp -%ifidn %1, fp_32x32 - psllw m2, 1 -%endif - pxor m5, m5 ; m5 = dedicated zero - - lea coeffq, [ coeffq+ncoeffq*2] - lea r5q, [ r5q+ncoeffq*2] - lea r3q, [ r3q+ncoeffq*2] - lea r4q, [r4q+ncoeffq*2] - neg ncoeffq - - ; get DC and first 15 AC coeffs - mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] - mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] - pabsw m6, m9 ; m6 = abs(m9) - pabsw m11, m10 ; m11 = abs(m10) - pcmpeqw m7, m7 - - paddsw m6, m1 ; m6 += round - punpckhqdq m1, m1 - paddsw m11, m1 ; m11 += round - pmulhw m8, m6, m2 ; m8 = m6*q>>16 - punpckhqdq m2, m2 - pmulhw m13, m11, m2 ; m13 = m11*q>>16 - psignw m8, m9 ; m8 = reinsert sign - psignw m13, m10 ; m13 = reinsert sign - mova [r3q+ncoeffq*2+ 0], m8 - mova [r3q+ncoeffq*2+16], m13 -%ifidn %1, fp_32x32 - pabsw m8, m8 - pabsw m13, m13 -%endif - pmullw m8, m3 ; r4[i] = r3[i] * q - punpckhqdq m3, m3 - pmullw m13, m3 ; r4[i] = r3[i] * q -%ifidn %1, fp_32x32 - psrlw m8, 1 - psrlw m13, 1 - psignw m8, m9 - psignw m13, m10 - psrlw m0, m3, 2 -%else - psrlw m0, m3, 1 -%endif - mova [r4q+ncoeffq*2+ 0], m8 - mova [r4q+ncoeffq*2+16], m13 - pcmpeqw m8, m5 ; m8 = c[i] == 0 - pcmpeqw m13, m5 ; m13 = c[i] == 0 - mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i] - mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i] - psubw m6, m7 ; m6 = scan[i] + 1 - psubw m11, m7 ; m11 = scan[i] + 1 - pandn m8, m6 ; m8 = max(eob) - pandn m13, m11 ; m13 = max(eob) - pmaxsw m8, m13 - add ncoeffq, mmsize - jz .accumulate_eob - -.ac_only_loop: - mova m9, [ coeffq+ncoeffq*2+ 0] ; m9 = c[i] - mova m10, [ coeffq+ncoeffq*2+16] ; m10 = c[i] - pabsw m6, m9 ; m6 = abs(m9) - pabsw m11, m10 ; m11 = abs(m10) - - pcmpgtw m7, m6, m0 - pcmpgtw m12, m11, m0 - pmovmskb r6d, m7 - pmovmskb r2d, m12 - - or r6, r2 - jz .skip_iter - - pcmpeqw m7, m7 - - paddsw m6, m1 ; m6 += round - paddsw m11, m1 ; m11 += round - pmulhw m14, m6, m2 ; m14 = m6*q>>16 - pmulhw m13, m11, m2 ; m13 = m11*q>>16 - psignw m14, m9 ; m14 = reinsert sign - psignw m13, m10 ; m13 = reinsert sign - mova [r3q+ncoeffq*2+ 0], m14 - mova [r3q+ncoeffq*2+16], m13 -%ifidn %1, fp_32x32 - pabsw m14, m14 - pabsw m13, m13 -%endif - pmullw m14, m3 ; r4[i] = r3[i] * q - pmullw m13, m3 ; r4[i] = r3[i] * q -%ifidn %1, fp_32x32 - psrlw m14, 1 - psrlw m13, 1 - psignw m14, m9 - psignw m13, m10 -%endif - mova [r4q+ncoeffq*2+ 0], m14 - mova [r4q+ncoeffq*2+16], m13 - pcmpeqw m14, m5 ; m14 = c[i] == 0 - pcmpeqw m13, m5 ; m13 = c[i] == 0 - mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i] - mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i] - psubw m6, m7 ; m6 = scan[i] + 1 - psubw m11, m7 ; m11 = scan[i] + 1 - pandn m14, m6 ; m14 = max(eob) - pandn m13, m11 ; m13 = max(eob) - pmaxsw m8, m14 - pmaxsw m8, m13 - add ncoeffq, mmsize - jl .ac_only_loop - - jmp .accumulate_eob -.skip_iter: - mova [r3q+ncoeffq*2+ 0], m5 - mova [r3q+ncoeffq*2+16], m5 - mova [r4q+ncoeffq*2+ 0], m5 - mova [r4q+ncoeffq*2+16], m5 - add ncoeffq, mmsize - jl .ac_only_loop - -.accumulate_eob: - ; horizontally accumulate/max eobs and write into [eob] memory pointer - mov r2, eobmp - pshufd m7, m8, 0xe - pmaxsw m8, m7 - pshuflw m7, m8, 0xe - pmaxsw m8, m7 - pshuflw m7, m8, 0x1 - pmaxsw m8, m7 - pextrw r6, m8, 0 - mov [r2], r6 - RET - - ; skip-block, i.e. just write all zeroes -.blank: - mov r0, dqcoeffmp - movifnidn ncoeffq, ncoeffmp - mov r2, qcoeffmp - mov r3, eobmp - - lea r0q, [r0q+ncoeffq*2] - lea r2q, [r2q+ncoeffq*2] - neg ncoeffq - pxor m7, m7 -.blank_loop: - mova [r0q+ncoeffq*2+ 0], m7 - mova [r0q+ncoeffq*2+16], m7 - mova [r2q+ncoeffq*2+ 0], m7 - mova [r2q+ncoeffq*2+16], m7 - add ncoeffq, mmsize - jl .blank_loop - mov word [r3q], 0 - RET -%endmacro - -INIT_XMM ssse3 -QUANTIZE_FP fp, 7 -QUANTIZE_FP fp_32x32, 7 diff --git a/third_party/aom/av1/encoder/x86/av1_ssim_opt_x86_64.asm b/third_party/aom/av1/encoder/x86/av1_ssim_opt_x86_64.asm deleted file mode 100644 index faa2a232a..000000000 --- a/third_party/aom/av1/encoder/x86/av1_ssim_opt_x86_64.asm +++ /dev/null @@ -1,222 +0,0 @@ -; -; Copyright (c) 2016, Alliance for Open Media. All rights reserved -; -; This source code is subject to the terms of the BSD 2 Clause License and -; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License -; was not distributed with this source code in the LICENSE file, you can -; obtain it at www.aomedia.org/license/software. If the Alliance for Open -; Media Patent License 1.0 was not distributed with this source code in the -; PATENTS file, you can obtain it at www.aomedia.org/license/patent. -; - -; - -%include "aom_ports/x86_abi_support.asm" - -; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr -%macro TABULATE_SSIM 0 - paddusw xmm15, xmm3 ; sum_s - paddusw xmm14, xmm4 ; sum_r - movdqa xmm1, xmm3 - pmaddwd xmm1, xmm1 - paddd xmm13, xmm1 ; sum_sq_s - movdqa xmm2, xmm4 - pmaddwd xmm2, xmm2 - paddd xmm12, xmm2 ; sum_sq_r - pmaddwd xmm3, xmm4 - paddd xmm11, xmm3 ; sum_sxr -%endmacro - -; Sum across the register %1 starting with q words -%macro SUM_ACROSS_Q 1 - movdqa xmm2,%1 - punpckldq %1,xmm0 - punpckhdq xmm2,xmm0 - paddq %1,xmm2 - movdqa xmm2,%1 - punpcklqdq %1,xmm0 - punpckhqdq xmm2,xmm0 - paddq %1,xmm2 -%endmacro - -; Sum across the register %1 starting with q words -%macro SUM_ACROSS_W 1 - movdqa xmm1, %1 - punpcklwd %1,xmm0 - punpckhwd xmm1,xmm0 - paddd %1, xmm1 - SUM_ACROSS_Q %1 -%endmacro - -SECTION .text - -;void ssim_parms_sse2( -; unsigned char *s, -; int sp, -; unsigned char *r, -; int rp -; unsigned long *sum_s, -; unsigned long *sum_r, -; unsigned long *sum_sq_s, -; unsigned long *sum_sq_r, -; unsigned long *sum_sxr); -; -; TODO: Use parm passing through structure, probably don't need the pxors -; ( calling app will initialize to 0 ) could easily fit everything in sse2 -; without too much hastle, and can probably do better estimates with psadw -; or pavgb At this point this is just meant to be first pass for calculating -; all the parms needed for 16x16 ssim so we can play with dssim as distortion -; in mode selection code. -global sym(av1_ssim_parms_16x16_sse2) PRIVATE -sym(av1_ssim_parms_16x16_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 15 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rcx, arg(1) ;sp - mov rdi, arg(2) ;r - mov rax, arg(3) ;rp - - pxor xmm0, xmm0 - pxor xmm15,xmm15 ;sum_s - pxor xmm14,xmm14 ;sum_r - pxor xmm13,xmm13 ;sum_sq_s - pxor xmm12,xmm12 ;sum_sq_r - pxor xmm11,xmm11 ;sum_sxr - - mov rdx, 16 ;row counter -.NextRow: - - ;grab source and reference pixels - movdqu xmm5, [rsi] - movdqu xmm6, [rdi] - movdqa xmm3, xmm5 - movdqa xmm4, xmm6 - punpckhbw xmm3, xmm0 ; high_s - punpckhbw xmm4, xmm0 ; high_r - - TABULATE_SSIM - - movdqa xmm3, xmm5 - movdqa xmm4, xmm6 - punpcklbw xmm3, xmm0 ; low_s - punpcklbw xmm4, xmm0 ; low_r - - TABULATE_SSIM - - add rsi, rcx ; next s row - add rdi, rax ; next r row - - dec rdx ; counter - jnz .NextRow - - SUM_ACROSS_W xmm15 - SUM_ACROSS_W xmm14 - SUM_ACROSS_Q xmm13 - SUM_ACROSS_Q xmm12 - SUM_ACROSS_Q xmm11 - - mov rdi,arg(4) - movd [rdi], xmm15; - mov rdi,arg(5) - movd [rdi], xmm14; - mov rdi,arg(6) - movd [rdi], xmm13; - mov rdi,arg(7) - movd [rdi], xmm12; - mov rdi,arg(8) - movd [rdi], xmm11; - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void ssim_parms_sse2( -; unsigned char *s, -; int sp, -; unsigned char *r, -; int rp -; unsigned long *sum_s, -; unsigned long *sum_r, -; unsigned long *sum_sq_s, -; unsigned long *sum_sq_r, -; unsigned long *sum_sxr); -; -; TODO: Use parm passing through structure, probably don't need the pxors -; ( calling app will initialize to 0 ) could easily fit everything in sse2 -; without too much hastle, and can probably do better estimates with psadw -; or pavgb At this point this is just meant to be first pass for calculating -; all the parms needed for 16x16 ssim so we can play with dssim as distortion -; in mode selection code. -global sym(av1_ssim_parms_8x8_sse2) PRIVATE -sym(av1_ssim_parms_8x8_sse2): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 15 - push rsi - push rdi - ; end prolog - - mov rsi, arg(0) ;s - mov rcx, arg(1) ;sp - mov rdi, arg(2) ;r - mov rax, arg(3) ;rp - - pxor xmm0, xmm0 - pxor xmm15,xmm15 ;sum_s - pxor xmm14,xmm14 ;sum_r - pxor xmm13,xmm13 ;sum_sq_s - pxor xmm12,xmm12 ;sum_sq_r - pxor xmm11,xmm11 ;sum_sxr - - mov rdx, 8 ;row counter -.NextRow: - - ;grab source and reference pixels - movq xmm3, [rsi] - movq xmm4, [rdi] - punpcklbw xmm3, xmm0 ; low_s - punpcklbw xmm4, xmm0 ; low_r - - TABULATE_SSIM - - add rsi, rcx ; next s row - add rdi, rax ; next r row - - dec rdx ; counter - jnz .NextRow - - SUM_ACROSS_W xmm15 - SUM_ACROSS_W xmm14 - SUM_ACROSS_Q xmm13 - SUM_ACROSS_Q xmm12 - SUM_ACROSS_Q xmm11 - - mov rdi,arg(4) - movd [rdi], xmm15; - mov rdi,arg(5) - movd [rdi], xmm14; - mov rdi,arg(6) - movd [rdi], xmm13; - mov rdi,arg(7) - movd [rdi], xmm12; - mov rdi,arg(8) - movd [rdi], xmm11; - - ; begin epilog - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret diff --git a/third_party/aom/av1/encoder/x86/av1_txfm1d_sse4.h b/third_party/aom/av1/encoder/x86/av1_txfm1d_sse4.h deleted file mode 100644 index 6df2a8bdb..000000000 --- a/third_party/aom/av1/encoder/x86/av1_txfm1d_sse4.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#ifndef AOM_AV1_ENCODER_X86_AV1_TXFM1D_SSE4_H_ -#define AOM_AV1_ENCODER_X86_AV1_TXFM1D_SSE4_H_ - -#include -#include "av1/common/av1_txfm.h" -#include "av1/common/x86/av1_txfm_sse4.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void av1_fdct4_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); -void av1_fdct8_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); -void av1_fdct16_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); -void av1_fdct32_new_sse4_1(const __m128i *input, __m128i *output, - int8_t cos_bit); -void av1_fdct64_new_sse4_1(const __m128i *input, __m128i *output, - int8_t cos_bit, const int instride, - const int outstride); - -void av1_fadst4_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); -void av1_fadst8_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); -void av1_fadst16_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); - -void av1_idct4_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); -void av1_idct8_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); -void av1_idct16_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); -void av1_idct32_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); -void av1_idct64_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); - -void av1_iadst4_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); -void av1_iadst8_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); -void av1_iadst16_new_sse4_1(const __m128i *input, __m128i *output, - const int8_t cos_bit, const int8_t *stage_range); -static INLINE void transpose_32_4x4(int stride, const __m128i *input, - __m128i *output) { - __m128i temp0 = _mm_unpacklo_epi32(input[0 * stride], input[2 * stride]); - __m128i temp1 = _mm_unpackhi_epi32(input[0 * stride], input[2 * stride]); - __m128i temp2 = _mm_unpacklo_epi32(input[1 * stride], input[3 * stride]); - __m128i temp3 = _mm_unpackhi_epi32(input[1 * stride], input[3 * stride]); - - output[0 * stride] = _mm_unpacklo_epi32(temp0, temp2); - output[1 * stride] = _mm_unpackhi_epi32(temp0, temp2); - output[2 * stride] = _mm_unpacklo_epi32(temp1, temp3); - output[3 * stride] = _mm_unpackhi_epi32(temp1, temp3); -} - -// the entire input block can be represent by a grid of 4x4 blocks -// each 4x4 blocks can be represent by 4 vertical __m128i -// we first transpose each 4x4 block internally -// then transpose the grid -static INLINE void transpose_32(int txfm_size, const __m128i *input, - __m128i *output) { - const int num_per_128 = 4; - const int row_size = txfm_size; - const int col_size = txfm_size / num_per_128; - int r, c; - - // transpose each 4x4 block internally - for (r = 0; r < row_size; r += 4) { - for (c = 0; c < col_size; c++) { - transpose_32_4x4(col_size, &input[r * col_size + c], - &output[c * 4 * col_size + r / 4]); - } - } -} - -// out0 = in0*w0 + in1*w1 -// out1 = -in1*w0 + in0*w1 -#define btf_32_sse4_1_type0(w0, w1, in0, in1, out0, out1, bit) \ - do { \ - const __m128i ww0 = _mm_set1_epi32(w0); \ - const __m128i ww1 = _mm_set1_epi32(w1); \ - const __m128i in0_w0 = _mm_mullo_epi32(in0, ww0); \ - const __m128i in1_w1 = _mm_mullo_epi32(in1, ww1); \ - out0 = _mm_add_epi32(in0_w0, in1_w1); \ - out0 = av1_round_shift_32_sse4_1(out0, bit); \ - const __m128i in0_w1 = _mm_mullo_epi32(in0, ww1); \ - const __m128i in1_w0 = _mm_mullo_epi32(in1, ww0); \ - out1 = _mm_sub_epi32(in0_w1, in1_w0); \ - out1 = av1_round_shift_32_sse4_1(out1, bit); \ - } while (0) - -// out0 = in0*w0 + in1*w1 -// out1 = in1*w0 - in0*w1 -#define btf_32_sse4_1_type1(w0, w1, in0, in1, out0, out1, bit) \ - do { \ - btf_32_sse4_1_type0(w1, w0, in1, in0, out0, out1, bit); \ - } while (0) - -// out0 = in0*w0 + in1*w1 -// out1 = -in1*w0 + in0*w1 -#define btf_32_type0_sse4_1_new(ww0, ww1, in0, in1, out0, out1, r, bit) \ - do { \ - const __m128i in0_w0 = _mm_mullo_epi32(in0, ww0); \ - const __m128i in1_w1 = _mm_mullo_epi32(in1, ww1); \ - out0 = _mm_add_epi32(in0_w0, in1_w1); \ - out0 = _mm_add_epi32(out0, r); \ - out0 = _mm_srai_epi32(out0, bit); \ - const __m128i in0_w1 = _mm_mullo_epi32(in0, ww1); \ - const __m128i in1_w0 = _mm_mullo_epi32(in1, ww0); \ - out1 = _mm_sub_epi32(in0_w1, in1_w0); \ - out1 = _mm_add_epi32(out1, r); \ - out1 = _mm_srai_epi32(out1, bit); \ - } while (0) - -// out0 = in0*w0 + in1*w1 -// out1 = in1*w0 - in0*w1 -#define btf_32_type1_sse4_1_new(ww0, ww1, in0, in1, out0, out1, r, bit) \ - do { \ - btf_32_type0_sse4_1_new(ww1, ww0, in1, in0, out0, out1, r, bit); \ - } while (0) - -#ifdef __cplusplus -} -#endif - -#endif // AOM_AV1_ENCODER_X86_AV1_TXFM1D_SSE4_H_ diff --git a/third_party/aom/av1/encoder/x86/corner_match_sse4.c b/third_party/aom/av1/encoder/x86/corner_match_sse4.c deleted file mode 100644 index 93f37b71d..000000000 --- a/third_party/aom/av1/encoder/x86/corner_match_sse4.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include -#include - -#include - -#include "config/av1_rtcd.h" - -#include "aom_ports/mem.h" -#include "av1/encoder/corner_match.h" - -DECLARE_ALIGNED(16, static const uint8_t, byte_mask[16]) = { - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0 -}; -#if MATCH_SZ != 13 -#error "Need to change byte_mask in corner_match_sse4.c if MATCH_SZ != 13" -#endif - -/* Compute corr(im1, im2) * MATCH_SZ * stddev(im1), where the - correlation/standard deviation are taken over MATCH_SZ by MATCH_SZ windows - of each image, centered at (x1, y1) and (x2, y2) respectively. -*/ -double compute_cross_correlation_sse4_1(unsigned char *im1, int stride1, int x1, - int y1, unsigned char *im2, int stride2, - int x2, int y2) { - int i; - // 2 16-bit partial sums in lanes 0, 4 (== 2 32-bit partial sums in lanes 0, - // 2) - __m128i sum1_vec = _mm_setzero_si128(); - __m128i sum2_vec = _mm_setzero_si128(); - // 4 32-bit partial sums of squares - __m128i sumsq2_vec = _mm_setzero_si128(); - __m128i cross_vec = _mm_setzero_si128(); - - const __m128i mask = _mm_load_si128((__m128i *)byte_mask); - const __m128i zero = _mm_setzero_si128(); - - im1 += (y1 - MATCH_SZ_BY2) * stride1 + (x1 - MATCH_SZ_BY2); - im2 += (y2 - MATCH_SZ_BY2) * stride2 + (x2 - MATCH_SZ_BY2); - - for (i = 0; i < MATCH_SZ; ++i) { - const __m128i v1 = - _mm_and_si128(_mm_loadu_si128((__m128i *)&im1[i * stride1]), mask); - const __m128i v2 = - _mm_and_si128(_mm_loadu_si128((__m128i *)&im2[i * stride2]), mask); - - // Using the 'sad' intrinsic here is a bit faster than adding - // v1_l + v1_r and v2_l + v2_r, plus it avoids the need for a 16->32 bit - // conversion step later, for a net speedup of ~10% - sum1_vec = _mm_add_epi16(sum1_vec, _mm_sad_epu8(v1, zero)); - sum2_vec = _mm_add_epi16(sum2_vec, _mm_sad_epu8(v2, zero)); - - const __m128i v1_l = _mm_cvtepu8_epi16(v1); - const __m128i v1_r = _mm_cvtepu8_epi16(_mm_srli_si128(v1, 8)); - const __m128i v2_l = _mm_cvtepu8_epi16(v2); - const __m128i v2_r = _mm_cvtepu8_epi16(_mm_srli_si128(v2, 8)); - - sumsq2_vec = _mm_add_epi32( - sumsq2_vec, - _mm_add_epi32(_mm_madd_epi16(v2_l, v2_l), _mm_madd_epi16(v2_r, v2_r))); - cross_vec = _mm_add_epi32( - cross_vec, - _mm_add_epi32(_mm_madd_epi16(v1_l, v2_l), _mm_madd_epi16(v1_r, v2_r))); - } - - // Now we can treat the four registers (sum1_vec, sum2_vec, sumsq2_vec, - // cross_vec) - // as holding 4 32-bit elements each, which we want to sum horizontally. - // We do this by transposing and then summing vertically. - __m128i tmp_0 = _mm_unpacklo_epi32(sum1_vec, sum2_vec); - __m128i tmp_1 = _mm_unpackhi_epi32(sum1_vec, sum2_vec); - __m128i tmp_2 = _mm_unpacklo_epi32(sumsq2_vec, cross_vec); - __m128i tmp_3 = _mm_unpackhi_epi32(sumsq2_vec, cross_vec); - - __m128i tmp_4 = _mm_unpacklo_epi64(tmp_0, tmp_2); - __m128i tmp_5 = _mm_unpackhi_epi64(tmp_0, tmp_2); - __m128i tmp_6 = _mm_unpacklo_epi64(tmp_1, tmp_3); - __m128i tmp_7 = _mm_unpackhi_epi64(tmp_1, tmp_3); - - __m128i res = - _mm_add_epi32(_mm_add_epi32(tmp_4, tmp_5), _mm_add_epi32(tmp_6, tmp_7)); - - int sum1 = _mm_extract_epi32(res, 0); - int sum2 = _mm_extract_epi32(res, 1); - int sumsq2 = _mm_extract_epi32(res, 2); - int cross = _mm_extract_epi32(res, 3); - - int var2 = sumsq2 * MATCH_SZ_SQ - sum2 * sum2; - int cov = cross * MATCH_SZ_SQ - sum1 * sum2; - return cov / sqrt((double)var2); -} diff --git a/third_party/aom/av1/encoder/x86/dct_sse2.asm b/third_party/aom/av1/encoder/x86/dct_sse2.asm deleted file mode 100644 index b18554818..000000000 --- a/third_party/aom/av1/encoder/x86/dct_sse2.asm +++ /dev/null @@ -1,82 +0,0 @@ -; -; Copyright (c) 2016, Alliance for Open Media. All rights reserved -; -; This source code is subject to the terms of the BSD 2 Clause License and -; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License -; was not distributed with this source code in the LICENSE file, you can -; obtain it at www.aomedia.org/license/software. If the Alliance for Open -; Media Patent License 1.0 was not distributed with this source code in the -; PATENTS file, you can obtain it at www.aomedia.org/license/patent. -; - -%define private_prefix av1 - -%include "third_party/x86inc/x86inc.asm" - -SECTION .text - -%macro TRANSFORM_COLS 0 - paddw m0, m1 - movq m4, m0 - psubw m3, m2 - psubw m4, m3 - psraw m4, 1 - movq m5, m4 - psubw m5, m1 ;b1 - psubw m4, m2 ;c1 - psubw m0, m4 - paddw m3, m5 - ; m0 a0 - SWAP 1, 4 ; m1 c1 - SWAP 2, 3 ; m2 d1 - SWAP 3, 5 ; m3 b1 -%endmacro - -%macro TRANSPOSE_4X4 0 - ; 00 01 02 03 - ; 10 11 12 13 - ; 20 21 22 23 - ; 30 31 32 33 - punpcklwd m0, m1 ; 00 10 01 11 02 12 03 13 - punpcklwd m2, m3 ; 20 30 21 31 22 32 23 33 - mova m1, m0 - punpckldq m0, m2 ; 00 10 20 30 01 11 21 31 - punpckhdq m1, m2 ; 02 12 22 32 03 13 23 33 -%endmacro - -INIT_XMM sse2 -cglobal fwht4x4, 3, 4, 8, input, output, stride - lea r3q, [inputq + strideq*4] - movq m0, [inputq] ;a1 - movq m1, [inputq + strideq*2] ;b1 - movq m2, [r3q] ;c1 - movq m3, [r3q + strideq*2] ;d1 - - TRANSFORM_COLS - TRANSPOSE_4X4 - SWAP 1, 2 - psrldq m1, m0, 8 - psrldq m3, m2, 8 - TRANSFORM_COLS - TRANSPOSE_4X4 - - psllw m0, 2 - psllw m1, 2 - - ; sign extension - mova m2, m0 - mova m3, m1 - punpcklwd m0, m0 - punpcklwd m1, m1 - punpckhwd m2, m2 - punpckhwd m3, m3 - psrad m0, 16 - psrad m1, 16 - psrad m2, 16 - psrad m3, 16 - mova [outputq], m0 - mova [outputq + 16], m2 - mova [outputq + 32], m1 - mova [outputq + 48], m3 - - RET diff --git a/third_party/aom/av1/encoder/x86/encodetxb_avx2.c b/third_party/aom/av1/encoder/x86/encodetxb_avx2.c deleted file mode 100644 index 7642f57d1..000000000 --- a/third_party/aom/av1/encoder/x86/encodetxb_avx2.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include // SSE2 -#include /* SSE4.1 */ -#include /* AVX2 */ - -#include "aom/aom_integer.h" -#include "aom_dsp/x86/mem_sse2.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/txb_common.h" -#include "aom_dsp/x86/synonyms.h" -#include "aom_dsp/x86/synonyms_avx2.h" - -void av1_txb_init_levels_avx2(const tran_low_t *const coeff, const int width, - const int height, uint8_t *const levels) { - const int stride = width + TX_PAD_HOR; - const __m256i y_zeros = _mm256_setzero_si256(); - - const int32_t pre_len = sizeof(*levels) * TX_PAD_TOP * stride; - uint8_t *pre_buf = levels - TX_PAD_TOP * stride; - uint8_t *pre_buf_end = pre_buf + pre_len; - do { - yy_storeu_256(pre_buf, y_zeros); - pre_buf += 32; - } while (pre_buf < pre_buf_end); - - const int32_t bottom_len = sizeof(*levels) * (TX_PAD_BOTTOM * stride); - uint8_t *bottom_buf_end = levels + (height + TX_PAD_BOTTOM) * stride; - uint8_t *bottom_buf = bottom_buf_end - ((bottom_len + 31) & (~31)); - - do { - yy_storeu_256(bottom_buf, y_zeros); - bottom_buf += 32; - } while (bottom_buf < bottom_buf_end); - - int i = 0; - uint8_t *ls = levels; - const tran_low_t *cf = coeff; - if (width == 4) { - do { - const __m256i c0 = yy_loadu_256(cf); - const __m256i c1 = yy_loadu_256(cf + 8); - const __m256i abs01 = _mm256_abs_epi16(_mm256_packs_epi32(c0, c1)); - const __m256i abs01_8 = _mm256_packs_epi16(abs01, y_zeros); - const __m256i res_ = _mm256_shuffle_epi32(abs01_8, 0xd8); - const __m256i res = _mm256_permute4x64_epi64(res_, 0xd8); - yy_storeu_256(ls, res); - ls += 32; - cf += 16; - i += 4; - } while (i < height); - } else if (width == 8) { - do { - const __m256i coeffA = yy_loadu_256(cf); - const __m256i coeffB = yy_loadu_256(cf + 8); - const __m256i coeffC = yy_loadu_256(cf + 16); - const __m256i coeffD = yy_loadu_256(cf + 24); - const __m256i coeffAB = _mm256_packs_epi32(coeffA, coeffB); - const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD); - const __m256i absAB = _mm256_abs_epi16(coeffAB); - const __m256i absCD = _mm256_abs_epi16(coeffCD); - const __m256i absABCD = _mm256_packs_epi16(absAB, absCD); - const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8); - const __m256i res = _mm256_shuffle_epi32(res_, 0xd8); - const __m128i res0 = _mm256_castsi256_si128(res); - const __m128i res1 = _mm256_extracti128_si256(res, 1); - xx_storel_64(ls, res0); - *(int32_t *)(ls + width) = 0; - xx_storel_64(ls + stride, _mm_srli_si128(res0, 8)); - *(int32_t *)(ls + width + stride) = 0; - xx_storel_64(ls + stride * 2, res1); - *(int32_t *)(ls + width + stride * 2) = 0; - xx_storel_64(ls + stride * 3, _mm_srli_si128(res1, 8)); - *(int32_t *)(ls + width + stride * 3) = 0; - cf += 32; - ls += stride << 2; - i += 4; - } while (i < height); - } else if (width == 16) { - do { - const __m256i coeffA = yy_loadu_256(cf); - const __m256i coeffB = yy_loadu_256(cf + 8); - const __m256i coeffC = yy_loadu_256(cf + 16); - const __m256i coeffD = yy_loadu_256(cf + 24); - const __m256i coeffAB = _mm256_packs_epi32(coeffA, coeffB); - const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD); - const __m256i absAB = _mm256_abs_epi16(coeffAB); - const __m256i absCD = _mm256_abs_epi16(coeffCD); - const __m256i absABCD = _mm256_packs_epi16(absAB, absCD); - const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8); - const __m256i res = _mm256_shuffle_epi32(res_, 0xd8); - xx_storeu_128(ls, _mm256_castsi256_si128(res)); - xx_storeu_128(ls + stride, _mm256_extracti128_si256(res, 1)); - cf += 32; - *(int32_t *)(ls + width) = 0; - *(int32_t *)(ls + stride + width) = 0; - ls += stride << 1; - i += 2; - } while (i < height); - } else { - do { - const __m256i coeffA = yy_loadu_256(cf); - const __m256i coeffB = yy_loadu_256(cf + 8); - const __m256i coeffC = yy_loadu_256(cf + 16); - const __m256i coeffD = yy_loadu_256(cf + 24); - const __m256i coeffAB = _mm256_packs_epi32(coeffA, coeffB); - const __m256i coeffCD = _mm256_packs_epi32(coeffC, coeffD); - const __m256i absAB = _mm256_abs_epi16(coeffAB); - const __m256i absCD = _mm256_abs_epi16(coeffCD); - const __m256i absABCD = _mm256_packs_epi16(absAB, absCD); - const __m256i res_ = _mm256_permute4x64_epi64(absABCD, 0xd8); - const __m256i res = _mm256_shuffle_epi32(res_, 0xd8); - yy_storeu_256(ls, res); - cf += 32; - *(int32_t *)(ls + width) = 0; - ls += stride; - i += 1; - } while (i < height); - } -} diff --git a/third_party/aom/av1/encoder/x86/encodetxb_sse2.c b/third_party/aom/av1/encoder/x86/encodetxb_sse2.c deleted file mode 100644 index dedb4d02f..000000000 --- a/third_party/aom/av1/encoder/x86/encodetxb_sse2.c +++ /dev/null @@ -1,505 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include // SSE2 - -#include "aom/aom_integer.h" -#include "aom_dsp/x86/mem_sse2.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/txb_common.h" - -static INLINE void load_levels_4x4x5_sse2(const uint8_t *const src, - const int stride, - const ptrdiff_t *const offsets, - __m128i *const level) { - level[0] = load_8bit_4x4_to_1_reg_sse2(src + 1, stride); - level[1] = load_8bit_4x4_to_1_reg_sse2(src + stride, stride); - level[2] = load_8bit_4x4_to_1_reg_sse2(src + offsets[0], stride); - level[3] = load_8bit_4x4_to_1_reg_sse2(src + offsets[1], stride); - level[4] = load_8bit_4x4_to_1_reg_sse2(src + offsets[2], stride); -} - -static INLINE void load_levels_8x2x5_sse2(const uint8_t *const src, - const int stride, - const ptrdiff_t *const offsets, - __m128i *const level) { - level[0] = load_8bit_8x2_to_1_reg_sse2(src + 1, stride); - level[1] = load_8bit_8x2_to_1_reg_sse2(src + stride, stride); - level[2] = load_8bit_8x2_to_1_reg_sse2(src + offsets[0], stride); - level[3] = load_8bit_8x2_to_1_reg_sse2(src + offsets[1], stride); - level[4] = load_8bit_8x2_to_1_reg_sse2(src + offsets[2], stride); -} - -static INLINE void load_levels_16x1x5_sse2(const uint8_t *const src, - const int stride, - const ptrdiff_t *const offsets, - __m128i *const level) { - level[0] = _mm_loadu_si128((__m128i *)(src + 1)); - level[1] = _mm_loadu_si128((__m128i *)(src + stride)); - level[2] = _mm_loadu_si128((__m128i *)(src + offsets[0])); - level[3] = _mm_loadu_si128((__m128i *)(src + offsets[1])); - level[4] = _mm_loadu_si128((__m128i *)(src + offsets[2])); -} - -static INLINE __m128i get_coeff_contexts_kernel_sse2(__m128i *const level) { - const __m128i const_3 = _mm_set1_epi8(3); - const __m128i const_4 = _mm_set1_epi8(4); - __m128i count; - - count = _mm_min_epu8(level[0], const_3); - level[1] = _mm_min_epu8(level[1], const_3); - level[2] = _mm_min_epu8(level[2], const_3); - level[3] = _mm_min_epu8(level[3], const_3); - level[4] = _mm_min_epu8(level[4], const_3); - count = _mm_add_epi8(count, level[1]); - count = _mm_add_epi8(count, level[2]); - count = _mm_add_epi8(count, level[3]); - count = _mm_add_epi8(count, level[4]); - count = _mm_avg_epu8(count, _mm_setzero_si128()); - count = _mm_min_epu8(count, const_4); - return count; -} - -static INLINE void get_4_nz_map_contexts_2d(const uint8_t *levels, - const int height, - const ptrdiff_t *const offsets, - int8_t *const coeff_contexts) { - const int stride = 4 + TX_PAD_HOR; - const __m128i pos_to_offset_large = _mm_set1_epi8(21); - __m128i pos_to_offset = - (height == 4) - ? _mm_setr_epi8(0, 1, 6, 6, 1, 6, 6, 21, 6, 6, 21, 21, 6, 21, 21, 21) - : _mm_setr_epi8(0, 11, 11, 11, 11, 11, 11, 11, 6, 6, 21, 21, 6, 21, - 21, 21); - __m128i count; - __m128i level[5]; - int8_t *cc = coeff_contexts; - int row = height; - - assert(!(height % 4)); - - do { - load_levels_4x4x5_sse2(levels, stride, offsets, level); - count = get_coeff_contexts_kernel_sse2(level); - count = _mm_add_epi8(count, pos_to_offset); - _mm_store_si128((__m128i *)cc, count); - pos_to_offset = pos_to_offset_large; - levels += 4 * stride; - cc += 16; - row -= 4; - } while (row); - - coeff_contexts[0] = 0; -} - -static INLINE void get_4_nz_map_contexts_hor(const uint8_t *levels, - const int height, - const ptrdiff_t *const offsets, - int8_t *coeff_contexts) { - const int stride = 4 + TX_PAD_HOR; - const __m128i pos_to_offset = - _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10); - __m128i count; - __m128i level[5]; - int row = height; - - assert(!(height % 4)); - - do { - load_levels_4x4x5_sse2(levels, stride, offsets, level); - count = get_coeff_contexts_kernel_sse2(level); - count = _mm_add_epi8(count, pos_to_offset); - _mm_store_si128((__m128i *)coeff_contexts, count); - levels += 4 * stride; - coeff_contexts += 16; - row -= 4; - } while (row); -} - -static INLINE void get_4_nz_map_contexts_ver(const uint8_t *levels, - const int height, - const ptrdiff_t *const offsets, - int8_t *coeff_contexts) { - const int stride = 4 + TX_PAD_HOR; - const __m128i pos_to_offset_large = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10); - __m128i pos_to_offset = - _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0, - SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0, - SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5, - SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10); - __m128i count; - __m128i level[5]; - int row = height; - - assert(!(height % 4)); - - do { - load_levels_4x4x5_sse2(levels, stride, offsets, level); - count = get_coeff_contexts_kernel_sse2(level); - count = _mm_add_epi8(count, pos_to_offset); - _mm_store_si128((__m128i *)coeff_contexts, count); - pos_to_offset = pos_to_offset_large; - levels += 4 * stride; - coeff_contexts += 16; - row -= 4; - } while (row); -} - -static INLINE void get_8_coeff_contexts_2d(const uint8_t *levels, - const int height, - const ptrdiff_t *const offsets, - int8_t *coeff_contexts) { - const int stride = 8 + TX_PAD_HOR; - int8_t *cc = coeff_contexts; - int row = height; - __m128i count; - __m128i level[5]; - __m128i pos_to_offset[3]; - - assert(!(height % 2)); - - if (height == 8) { - pos_to_offset[0] = - _mm_setr_epi8(0, 1, 6, 6, 21, 21, 21, 21, 1, 6, 6, 21, 21, 21, 21, 21); - pos_to_offset[1] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21, - 21, 21, 21, 21, 21); - } else if (height < 8) { - pos_to_offset[0] = _mm_setr_epi8(0, 16, 6, 6, 21, 21, 21, 21, 16, 16, 6, 21, - 21, 21, 21, 21); - pos_to_offset[1] = _mm_setr_epi8(16, 16, 21, 21, 21, 21, 21, 21, 16, 16, 21, - 21, 21, 21, 21, 21); - } else { - pos_to_offset[0] = _mm_setr_epi8(0, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 11, 11, 11, 11, 11); - pos_to_offset[1] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 6, 21, 21, - 21, 21, 21, 21, 21); - } - pos_to_offset[2] = _mm_set1_epi8(21); - - do { - load_levels_8x2x5_sse2(levels, stride, offsets, level); - count = get_coeff_contexts_kernel_sse2(level); - count = _mm_add_epi8(count, pos_to_offset[0]); - _mm_store_si128((__m128i *)cc, count); - pos_to_offset[0] = pos_to_offset[1]; - pos_to_offset[1] = pos_to_offset[2]; - levels += 2 * stride; - cc += 16; - row -= 2; - } while (row); - - coeff_contexts[0] = 0; -} - -static INLINE void get_8_coeff_contexts_hor(const uint8_t *levels, - const int height, - const ptrdiff_t *const offsets, - int8_t *coeff_contexts) { - const int stride = 8 + TX_PAD_HOR; - const __m128i pos_to_offset = - _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10); - int row = height; - __m128i count; - __m128i level[5]; - - assert(!(height % 2)); - - do { - load_levels_8x2x5_sse2(levels, stride, offsets, level); - count = get_coeff_contexts_kernel_sse2(level); - count = _mm_add_epi8(count, pos_to_offset); - _mm_store_si128((__m128i *)coeff_contexts, count); - levels += 2 * stride; - coeff_contexts += 16; - row -= 2; - } while (row); -} - -static INLINE void get_8_coeff_contexts_ver(const uint8_t *levels, - const int height, - const ptrdiff_t *const offsets, - int8_t *coeff_contexts) { - const int stride = 8 + TX_PAD_HOR; - const __m128i pos_to_offset_large = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10); - __m128i pos_to_offset = - _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0, - SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0, - SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0, - SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 0, - SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5, - SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5, - SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5, - SIG_COEF_CONTEXTS_2D + 5, SIG_COEF_CONTEXTS_2D + 5); - int row = height; - __m128i count; - __m128i level[5]; - - assert(!(height % 2)); - - do { - load_levels_8x2x5_sse2(levels, stride, offsets, level); - count = get_coeff_contexts_kernel_sse2(level); - count = _mm_add_epi8(count, pos_to_offset); - _mm_store_si128((__m128i *)coeff_contexts, count); - pos_to_offset = pos_to_offset_large; - levels += 2 * stride; - coeff_contexts += 16; - row -= 2; - } while (row); -} - -static INLINE void get_16n_coeff_contexts_2d(const uint8_t *levels, - const int real_width, - const int real_height, - const int width, const int height, - const ptrdiff_t *const offsets, - int8_t *coeff_contexts) { - const int stride = width + TX_PAD_HOR; - int8_t *cc = coeff_contexts; - int row = height; - __m128i pos_to_offset[5]; - __m128i pos_to_offset_large[3]; - __m128i count; - __m128i level[5]; - - assert(!(width % 16)); - - pos_to_offset_large[2] = _mm_set1_epi8(21); - if (real_width == real_height) { - pos_to_offset[0] = _mm_setr_epi8(0, 1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21); - pos_to_offset[1] = _mm_setr_epi8(1, 6, 6, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21); - pos_to_offset[2] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21); - pos_to_offset[3] = _mm_setr_epi8(6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21); - pos_to_offset[4] = pos_to_offset_large[0] = pos_to_offset_large[1] = - pos_to_offset_large[2]; - } else if (real_width > real_height) { - pos_to_offset[0] = _mm_setr_epi8(0, 16, 6, 6, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21); - pos_to_offset[1] = _mm_setr_epi8(16, 16, 6, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21); - pos_to_offset[2] = pos_to_offset[3] = pos_to_offset[4] = _mm_setr_epi8( - 16, 16, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21); - pos_to_offset_large[0] = pos_to_offset_large[1] = pos_to_offset_large[2]; - } else { // real_width < real_height - pos_to_offset[0] = pos_to_offset[1] = _mm_setr_epi8( - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11); - pos_to_offset[2] = _mm_setr_epi8(6, 6, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21); - pos_to_offset[3] = _mm_setr_epi8(6, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21); - pos_to_offset[4] = pos_to_offset_large[2]; - pos_to_offset_large[0] = pos_to_offset_large[1] = _mm_set1_epi8(11); - } - - do { - int w = width; - - do { - load_levels_16x1x5_sse2(levels, stride, offsets, level); - count = get_coeff_contexts_kernel_sse2(level); - count = _mm_add_epi8(count, pos_to_offset[0]); - _mm_store_si128((__m128i *)cc, count); - levels += 16; - cc += 16; - w -= 16; - pos_to_offset[0] = pos_to_offset_large[0]; - } while (w); - - pos_to_offset[0] = pos_to_offset[1]; - pos_to_offset[1] = pos_to_offset[2]; - pos_to_offset[2] = pos_to_offset[3]; - pos_to_offset[3] = pos_to_offset[4]; - pos_to_offset_large[0] = pos_to_offset_large[1]; - pos_to_offset_large[1] = pos_to_offset_large[2]; - levels += TX_PAD_HOR; - } while (--row); - - coeff_contexts[0] = 0; -} - -static INLINE void get_16n_coeff_contexts_hor(const uint8_t *levels, - const int width, const int height, - const ptrdiff_t *const offsets, - int8_t *coeff_contexts) { - const int stride = width + TX_PAD_HOR; - const __m128i pos_to_offset_large = - _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10); - __m128i count; - __m128i level[5]; - int row = height; - - assert(!(width % 16)); - - do { - __m128i pos_to_offset = - _mm_setr_epi8(SIG_COEF_CONTEXTS_2D + 0, SIG_COEF_CONTEXTS_2D + 5, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10, - SIG_COEF_CONTEXTS_2D + 10, SIG_COEF_CONTEXTS_2D + 10); - int w = width; - - do { - load_levels_16x1x5_sse2(levels, stride, offsets, level); - count = get_coeff_contexts_kernel_sse2(level); - count = _mm_add_epi8(count, pos_to_offset); - _mm_store_si128((__m128i *)coeff_contexts, count); - pos_to_offset = pos_to_offset_large; - levels += 16; - coeff_contexts += 16; - w -= 16; - } while (w); - - levels += TX_PAD_HOR; - } while (--row); -} - -static INLINE void get_16n_coeff_contexts_ver(const uint8_t *levels, - const int width, const int height, - const ptrdiff_t *const offsets, - int8_t *coeff_contexts) { - const int stride = width + TX_PAD_HOR; - __m128i pos_to_offset[3]; - __m128i count; - __m128i level[5]; - int row = height; - - assert(!(width % 16)); - - pos_to_offset[0] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 0); - pos_to_offset[1] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 5); - pos_to_offset[2] = _mm_set1_epi8(SIG_COEF_CONTEXTS_2D + 10); - - do { - int w = width; - - do { - load_levels_16x1x5_sse2(levels, stride, offsets, level); - count = get_coeff_contexts_kernel_sse2(level); - count = _mm_add_epi8(count, pos_to_offset[0]); - _mm_store_si128((__m128i *)coeff_contexts, count); - levels += 16; - coeff_contexts += 16; - w -= 16; - } while (w); - - pos_to_offset[0] = pos_to_offset[1]; - pos_to_offset[1] = pos_to_offset[2]; - levels += TX_PAD_HOR; - } while (--row); -} - -// Note: levels[] must be in the range [0, 127], inclusive. -void av1_get_nz_map_contexts_sse2(const uint8_t *const levels, - const int16_t *const scan, const uint16_t eob, - const TX_SIZE tx_size, - const TX_CLASS tx_class, - int8_t *const coeff_contexts) { - const int last_idx = eob - 1; - if (!last_idx) { - coeff_contexts[0] = 0; - return; - } - - const int real_width = tx_size_wide[tx_size]; - const int real_height = tx_size_high[tx_size]; - const int width = get_txb_wide(tx_size); - const int height = get_txb_high(tx_size); - const int stride = width + TX_PAD_HOR; - ptrdiff_t offsets[3]; - - /* coeff_contexts must be 16 byte aligned. */ - assert(!((intptr_t)coeff_contexts & 0xf)); - - if (tx_class == TX_CLASS_2D) { - offsets[0] = 0 * stride + 2; - offsets[1] = 1 * stride + 1; - offsets[2] = 2 * stride + 0; - - if (width == 4) { - get_4_nz_map_contexts_2d(levels, height, offsets, coeff_contexts); - } else if (width == 8) { - get_8_coeff_contexts_2d(levels, height, offsets, coeff_contexts); - } else if (width == 16) { - get_16n_coeff_contexts_2d(levels, real_width, real_height, width, height, - offsets, coeff_contexts); - } else { - get_16n_coeff_contexts_2d(levels, real_width, real_height, width, height, - offsets, coeff_contexts); - } - } else if (tx_class == TX_CLASS_HORIZ) { - offsets[0] = 2; - offsets[1] = 3; - offsets[2] = 4; - if (width == 4) { - get_4_nz_map_contexts_hor(levels, height, offsets, coeff_contexts); - } else if (width == 8) { - get_8_coeff_contexts_hor(levels, height, offsets, coeff_contexts); - } else { - get_16n_coeff_contexts_hor(levels, width, height, offsets, - coeff_contexts); - } - } else { // TX_CLASS_VERT - offsets[0] = 2 * stride; - offsets[1] = 3 * stride; - offsets[2] = 4 * stride; - if (width == 4) { - get_4_nz_map_contexts_ver(levels, height, offsets, coeff_contexts); - } else if (width == 8) { - get_8_coeff_contexts_ver(levels, height, offsets, coeff_contexts); - } else { - get_16n_coeff_contexts_ver(levels, width, height, offsets, - coeff_contexts); - } - } - - const int bwl = get_txb_bwl(tx_size); - const int pos = scan[last_idx]; - if (last_idx <= (height << bwl) / 8) - coeff_contexts[pos] = 1; - else if (last_idx <= (height << bwl) / 4) - coeff_contexts[pos] = 2; - else - coeff_contexts[pos] = 3; -} diff --git a/third_party/aom/av1/encoder/x86/encodetxb_sse4.c b/third_party/aom/av1/encoder/x86/encodetxb_sse4.c deleted file mode 100644 index 5e0687cd3..000000000 --- a/third_party/aom/av1/encoder/x86/encodetxb_sse4.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2017, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include // SSE2 -#include /* SSE4.1 */ - -#include "aom/aom_integer.h" -#include "av1/common/onyxc_int.h" -#include "av1/common/txb_common.h" -#include "aom_dsp/x86/synonyms.h" - -void av1_txb_init_levels_sse4_1(const tran_low_t *const coeff, const int width, - const int height, uint8_t *const levels) { - const int stride = width + TX_PAD_HOR; - const __m128i zeros = _mm_setzero_si128(); - - const int32_t pre_len = sizeof(*levels) * TX_PAD_TOP * stride; - uint8_t *pre_buf = levels - TX_PAD_TOP * stride; - uint8_t *pre_buf_end = pre_buf + pre_len; - do { - _mm_storeu_si128((__m128i *)(pre_buf), zeros); - pre_buf += 16; - } while (pre_buf < pre_buf_end); - - const int32_t bottom_len = sizeof(*levels) * (TX_PAD_BOTTOM * stride); - uint8_t *bottom_buf = levels + stride * height; - uint8_t *bottom_buf_end = bottom_buf + bottom_len; - do { - _mm_storeu_si128((__m128i *)(bottom_buf), zeros); - bottom_buf += 16; - } while (bottom_buf < bottom_buf_end); - - int i = 0; - uint8_t *ls = levels; - const tran_low_t *cf = coeff; - if (width == 4) { - do { - const __m128i coeffA = xx_loadu_128(cf); - const __m128i coeffB = xx_loadu_128(cf + 4); - const __m128i coeffAB = _mm_packs_epi32(coeffA, coeffB); - const __m128i absAB = _mm_abs_epi16(coeffAB); - const __m128i absAB8 = _mm_packs_epi16(absAB, zeros); - const __m128i lsAB = _mm_unpacklo_epi32(absAB8, zeros); - xx_storeu_128(ls, lsAB); - ls += (stride << 1); - cf += (width << 1); - i += 2; - } while (i < height); - } else if (width == 8) { - do { - const __m128i coeffA = xx_loadu_128(cf); - const __m128i coeffB = xx_loadu_128(cf + 4); - const __m128i coeffAB = _mm_packs_epi32(coeffA, coeffB); - const __m128i absAB = _mm_abs_epi16(coeffAB); - const __m128i absAB8 = _mm_packs_epi16(absAB, zeros); - xx_storeu_128(ls, absAB8); - ls += stride; - cf += width; - i += 1; - } while (i < height); - } else { - do { - int j = 0; - do { - const __m128i coeffA = xx_loadu_128(cf); - const __m128i coeffB = xx_loadu_128(cf + 4); - const __m128i coeffC = xx_loadu_128(cf + 8); - const __m128i coeffD = xx_loadu_128(cf + 12); - const __m128i coeffAB = _mm_packs_epi32(coeffA, coeffB); - const __m128i coeffCD = _mm_packs_epi32(coeffC, coeffD); - const __m128i absAB = _mm_abs_epi16(coeffAB); - const __m128i absCD = _mm_abs_epi16(coeffCD); - const __m128i absABCD = _mm_packs_epi16(absAB, absCD); - xx_storeu_128(ls + j, absABCD); - j += 16; - cf += 16; - } while (j < width); - *(int32_t *)(ls + width) = 0; - ls += stride; - i += 1; - } while (i < height); - } -} diff --git a/third_party/aom/av1/encoder/x86/error_intrin_avx2.c b/third_party/aom/av1/encoder/x86/error_intrin_avx2.c deleted file mode 100644 index 7d4f69585..000000000 --- a/third_party/aom/av1/encoder/x86/error_intrin_avx2.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include // AVX2 - -#include "config/av1_rtcd.h" - -#include "aom/aom_integer.h" - -static INLINE void read_coeff(const tran_low_t *coeff, intptr_t offset, - __m256i *c) { - const tran_low_t *addr = coeff + offset; - - if (sizeof(tran_low_t) == 4) { - const __m256i x0 = _mm256_loadu_si256((const __m256i *)addr); - const __m256i x1 = _mm256_loadu_si256((const __m256i *)addr + 1); - const __m256i y = _mm256_packs_epi32(x0, x1); - *c = _mm256_permute4x64_epi64(y, 0xD8); - } else { - *c = _mm256_loadu_si256((const __m256i *)addr); - } -} - -int64_t av1_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, - intptr_t block_size, int64_t *ssz) { - __m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg; - __m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi; - __m256i sse_reg_64hi, ssz_reg_64hi; - __m128i sse_reg128, ssz_reg128; - int64_t sse; - int i; - const __m256i zero_reg = _mm256_setzero_si256(); - - // init sse and ssz registerd to zero - sse_reg = _mm256_setzero_si256(); - ssz_reg = _mm256_setzero_si256(); - - for (i = 0; i < block_size; i += 16) { - // load 32 bytes from coeff and dqcoeff - read_coeff(coeff, i, &coeff_reg); - read_coeff(dqcoeff, i, &dqcoeff_reg); - // dqcoeff - coeff - dqcoeff_reg = _mm256_sub_epi16(dqcoeff_reg, coeff_reg); - // madd (dqcoeff - coeff) - dqcoeff_reg = _mm256_madd_epi16(dqcoeff_reg, dqcoeff_reg); - // madd coeff - coeff_reg = _mm256_madd_epi16(coeff_reg, coeff_reg); - // expand each double word of madd (dqcoeff - coeff) to quad word - exp_dqcoeff_lo = _mm256_unpacklo_epi32(dqcoeff_reg, zero_reg); - exp_dqcoeff_hi = _mm256_unpackhi_epi32(dqcoeff_reg, zero_reg); - // expand each double word of madd (coeff) to quad word - exp_coeff_lo = _mm256_unpacklo_epi32(coeff_reg, zero_reg); - exp_coeff_hi = _mm256_unpackhi_epi32(coeff_reg, zero_reg); - // add each quad word of madd (dqcoeff - coeff) and madd (coeff) - sse_reg = _mm256_add_epi64(sse_reg, exp_dqcoeff_lo); - ssz_reg = _mm256_add_epi64(ssz_reg, exp_coeff_lo); - sse_reg = _mm256_add_epi64(sse_reg, exp_dqcoeff_hi); - ssz_reg = _mm256_add_epi64(ssz_reg, exp_coeff_hi); - } - // save the higher 64 bit of each 128 bit lane - sse_reg_64hi = _mm256_srli_si256(sse_reg, 8); - ssz_reg_64hi = _mm256_srli_si256(ssz_reg, 8); - // add the higher 64 bit to the low 64 bit - sse_reg = _mm256_add_epi64(sse_reg, sse_reg_64hi); - ssz_reg = _mm256_add_epi64(ssz_reg, ssz_reg_64hi); - - // add each 64 bit from each of the 128 bit lane of the 256 bit - sse_reg128 = _mm_add_epi64(_mm256_castsi256_si128(sse_reg), - _mm256_extractf128_si256(sse_reg, 1)); - - ssz_reg128 = _mm_add_epi64(_mm256_castsi256_si128(ssz_reg), - _mm256_extractf128_si256(ssz_reg, 1)); - - // store the results - _mm_storel_epi64((__m128i *)(&sse), sse_reg128); - - _mm_storel_epi64((__m128i *)(ssz), ssz_reg128); - _mm256_zeroupper(); - return sse; -} diff --git a/third_party/aom/av1/encoder/x86/error_sse2.asm b/third_party/aom/av1/encoder/x86/error_sse2.asm deleted file mode 100644 index 72e9e22b1..000000000 --- a/third_party/aom/av1/encoder/x86/error_sse2.asm +++ /dev/null @@ -1,79 +0,0 @@ -; -; Copyright (c) 2016, Alliance for Open Media. All rights reserved -; -; This source code is subject to the terms of the BSD 2 Clause License and -; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License -; was not distributed with this source code in the LICENSE file, you can -; obtain it at www.aomedia.org/license/software. If the Alliance for Open -; Media Patent License 1.0 was not distributed with this source code in the -; PATENTS file, you can obtain it at www.aomedia.org/license/patent. -; - -; - -%define private_prefix av1 - -%include "third_party/x86inc/x86inc.asm" - -SECTION .text - -; int64_t av1_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size, -; int64_t *ssz) - -INIT_XMM sse2 -cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz - pxor m4, m4 ; sse accumulator - pxor m6, m6 ; ssz accumulator - pxor m5, m5 ; dedicated zero register - lea uqcq, [uqcq+sizeq*2] - lea dqcq, [dqcq+sizeq*2] - neg sizeq -.loop: - mova m2, [uqcq+sizeq*2] - mova m0, [dqcq+sizeq*2] - mova m3, [uqcq+sizeq*2+mmsize] - mova m1, [dqcq+sizeq*2+mmsize] - psubw m0, m2 - psubw m1, m3 - ; individual errors are max. 15bit+sign, so squares are 30bit, and - ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit) - pmaddwd m0, m0 - pmaddwd m1, m1 - pmaddwd m2, m2 - pmaddwd m3, m3 - ; accumulate in 64bit - punpckldq m7, m0, m5 - punpckhdq m0, m5 - paddq m4, m7 - punpckldq m7, m1, m5 - paddq m4, m0 - punpckhdq m1, m5 - paddq m4, m7 - punpckldq m7, m2, m5 - paddq m4, m1 - punpckhdq m2, m5 - paddq m6, m7 - punpckldq m7, m3, m5 - paddq m6, m2 - punpckhdq m3, m5 - paddq m6, m7 - paddq m6, m3 - add sizeq, mmsize - jl .loop - - ; accumulate horizontally and store in return value - movhlps m5, m4 - movhlps m7, m6 - paddq m4, m5 - paddq m6, m7 -%if ARCH_X86_64 - movq rax, m4 - movq [sszq], m6 -%else - mov eax, sszm - pshufd m5, m4, 0x1 - movq [eax], m6 - movd eax, m4 - movd edx, m5 -%endif - RET diff --git a/third_party/aom/av1/encoder/x86/hash_sse42.c b/third_party/aom/av1/encoder/x86/hash_sse42.c deleted file mode 100644 index 65fa46311..000000000 --- a/third_party/aom/av1/encoder/x86/hash_sse42.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include - -// Byte-boundary alignment issues -#define ALIGN_SIZE 8 -#define ALIGN_MASK (ALIGN_SIZE - 1) - -#define CALC_CRC(op, crc, type, buf, len) \ - while ((len) >= sizeof(type)) { \ - (crc) = op((crc), *(type *)(buf)); \ - (len) -= sizeof(type); \ - buf += sizeof(type); \ - } - -/** - * Calculates 32-bit CRC for the input buffer - * polynomial is 0x11EDC6F41 - * @return A 32-bit unsigned integer representing the CRC - */ -uint32_t av1_get_crc32c_value_sse4_2(void *crc_calculator, uint8_t *p, - size_t len) { - (void)crc_calculator; - const uint8_t *buf = p; - uint32_t crc = 0xFFFFFFFF; - - // Align the input to the word boundary - for (; (len > 0) && ((intptr_t)buf & ALIGN_MASK); len--, buf++) { - crc = _mm_crc32_u8(crc, *buf); - } - -#ifdef __x86_64__ - uint64_t crc64 = crc; - CALC_CRC(_mm_crc32_u64, crc64, uint64_t, buf, len); - crc = (uint32_t)crc64; -#endif - CALC_CRC(_mm_crc32_u32, crc, uint32_t, buf, len); - CALC_CRC(_mm_crc32_u16, crc, uint16_t, buf, len); - CALC_CRC(_mm_crc32_u8, crc, uint8_t, buf, len); - return (crc ^= 0xFFFFFFFF); -} diff --git a/third_party/aom/av1/encoder/x86/highbd_block_error_intrin_sse2.c b/third_party/aom/av1/encoder/x86/highbd_block_error_intrin_sse2.c deleted file mode 100644 index 777304ace..000000000 --- a/third_party/aom/av1/encoder/x86/highbd_block_error_intrin_sse2.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include - -#include "av1/common/common.h" - -int64_t av1_highbd_block_error_sse2(tran_low_t *coeff, tran_low_t *dqcoeff, - intptr_t block_size, int64_t *ssz, - int bps) { - int i, j, test; - uint32_t temp[4]; - __m128i max, min, cmp0, cmp1, cmp2, cmp3; - int64_t error = 0, sqcoeff = 0; - const int shift = 2 * (bps - 8); - const int rounding = shift > 0 ? 1 << (shift - 1) : 0; - - for (i = 0; i < block_size; i += 8) { - // Load the data into xmm registers - __m128i mm_coeff = _mm_load_si128((__m128i *)(coeff + i)); - __m128i mm_coeff2 = _mm_load_si128((__m128i *)(coeff + i + 4)); - __m128i mm_dqcoeff = _mm_load_si128((__m128i *)(dqcoeff + i)); - __m128i mm_dqcoeff2 = _mm_load_si128((__m128i *)(dqcoeff + i + 4)); - // Check if any values require more than 15 bit - max = _mm_set1_epi32(0x3fff); - min = _mm_set1_epi32(0xffffc000); - cmp0 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff, max), - _mm_cmplt_epi32(mm_coeff, min)); - cmp1 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff2, max), - _mm_cmplt_epi32(mm_coeff2, min)); - cmp2 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff, max), - _mm_cmplt_epi32(mm_dqcoeff, min)); - cmp3 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff2, max), - _mm_cmplt_epi32(mm_dqcoeff2, min)); - test = _mm_movemask_epi8( - _mm_or_si128(_mm_or_si128(cmp0, cmp1), _mm_or_si128(cmp2, cmp3))); - - if (!test) { - __m128i mm_diff, error_sse2, sqcoeff_sse2; - mm_coeff = _mm_packs_epi32(mm_coeff, mm_coeff2); - mm_dqcoeff = _mm_packs_epi32(mm_dqcoeff, mm_dqcoeff2); - mm_diff = _mm_sub_epi16(mm_coeff, mm_dqcoeff); - error_sse2 = _mm_madd_epi16(mm_diff, mm_diff); - sqcoeff_sse2 = _mm_madd_epi16(mm_coeff, mm_coeff); - _mm_storeu_si128((__m128i *)temp, error_sse2); - error = error + temp[0] + temp[1] + temp[2] + temp[3]; - _mm_storeu_si128((__m128i *)temp, sqcoeff_sse2); - sqcoeff += temp[0] + temp[1] + temp[2] + temp[3]; - } else { - for (j = 0; j < 8; j++) { - const int64_t diff = coeff[i + j] - dqcoeff[i + j]; - error += diff * diff; - sqcoeff += (int64_t)coeff[i + j] * (int64_t)coeff[i + j]; - } - } - } - assert(error >= 0 && sqcoeff >= 0); - error = (error + rounding) >> shift; - sqcoeff = (sqcoeff + rounding) >> shift; - - *ssz = sqcoeff; - return error; -} diff --git a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c b/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c deleted file mode 100644 index 535485ae8..000000000 --- a/third_party/aom/av1/encoder/x86/highbd_fwd_txfm_sse4.c +++ /dev/null @@ -1,1783 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ -#include -#include /* SSE4.1 */ - -#include "config/aom_config.h" -#include "config/av1_rtcd.h" - -#include "av1/common/av1_txfm.h" -#include "av1/common/x86/highbd_txfm_utility_sse4.h" -#include "av1/encoder/av1_fwd_txfm1d_cfg.h" -#include "av1/encoder/x86/av1_txfm1d_sse4.h" -#include "aom_dsp/txfm_common.h" -#include "aom_dsp/x86/txfm_common_sse2.h" -#include "aom_ports/mem.h" - -static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in, - int stride, int flipud, int fliplr, - int shift) { - if (!flipud) { - in[0] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); - in[1] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); - in[2] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride)); - in[3] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride)); - } else { - in[0] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride)); - in[1] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride)); - in[2] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); - in[3] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); - } - - if (fliplr) { - in[0] = _mm_shufflelo_epi16(in[0], 0x1b); - in[1] = _mm_shufflelo_epi16(in[1], 0x1b); - in[2] = _mm_shufflelo_epi16(in[2], 0x1b); - in[3] = _mm_shufflelo_epi16(in[3], 0x1b); - } - - in[0] = _mm_cvtepi16_epi32(in[0]); - in[1] = _mm_cvtepi16_epi32(in[1]); - in[2] = _mm_cvtepi16_epi32(in[2]); - in[3] = _mm_cvtepi16_epi32(in[3]); - - in[0] = _mm_slli_epi32(in[0], shift); - in[1] = _mm_slli_epi32(in[1], shift); - in[2] = _mm_slli_epi32(in[2], shift); - in[3] = _mm_slli_epi32(in[3], shift); -} - -// We only use stage-2 bit; -// shift[0] is used in load_buffer_4x4() -// shift[1] is used in txfm_func_col() -// shift[2] is used in txfm_func_row() -static void fdct4x4_sse4_1(__m128i *in, int bit) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - __m128i s0, s1, s2, s3; - __m128i u0, u1, u2, u3; - __m128i v0, v1, v2, v3; - - s0 = _mm_add_epi32(in[0], in[3]); - s1 = _mm_add_epi32(in[1], in[2]); - s2 = _mm_sub_epi32(in[1], in[2]); - s3 = _mm_sub_epi32(in[0], in[3]); - - // btf_32_sse4_1_type0(cospi32, cospi32, s[01], u[02], bit); - u0 = _mm_mullo_epi32(s0, cospi32); - u1 = _mm_mullo_epi32(s1, cospi32); - u2 = _mm_add_epi32(u0, u1); - v0 = _mm_sub_epi32(u0, u1); - - u3 = _mm_add_epi32(u2, rnding); - v1 = _mm_add_epi32(v0, rnding); - - u0 = _mm_srai_epi32(u3, bit); - u2 = _mm_srai_epi32(v1, bit); - - // btf_32_sse4_1_type1(cospi48, cospi16, s[23], u[13], bit); - v0 = _mm_mullo_epi32(s2, cospi48); - v1 = _mm_mullo_epi32(s3, cospi16); - v2 = _mm_add_epi32(v0, v1); - - v3 = _mm_add_epi32(v2, rnding); - u1 = _mm_srai_epi32(v3, bit); - - v0 = _mm_mullo_epi32(s2, cospi16); - v1 = _mm_mullo_epi32(s3, cospi48); - v2 = _mm_sub_epi32(v1, v0); - - v3 = _mm_add_epi32(v2, rnding); - u3 = _mm_srai_epi32(v3, bit); - - // Note: shift[1] and shift[2] are zeros - - // Transpose 4x4 32-bit - v0 = _mm_unpacklo_epi32(u0, u1); - v1 = _mm_unpackhi_epi32(u0, u1); - v2 = _mm_unpacklo_epi32(u2, u3); - v3 = _mm_unpackhi_epi32(u2, u3); - - in[0] = _mm_unpacklo_epi64(v0, v2); - in[1] = _mm_unpackhi_epi64(v0, v2); - in[2] = _mm_unpacklo_epi64(v1, v3); - in[3] = _mm_unpackhi_epi64(v1, v3); -} - -static INLINE void write_buffer_4x4(__m128i *res, int32_t *output) { - _mm_store_si128((__m128i *)(output + 0 * 4), res[0]); - _mm_store_si128((__m128i *)(output + 1 * 4), res[1]); - _mm_store_si128((__m128i *)(output + 2 * 4), res[2]); - _mm_store_si128((__m128i *)(output + 3 * 4), res[3]); -} - -static void fadst4x4_sse4_1(__m128i *in, int bit) { - const int32_t *sinpi = sinpi_arr(bit); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const __m128i sinpi1 = _mm_set1_epi32((int)sinpi[1]); - const __m128i sinpi2 = _mm_set1_epi32((int)sinpi[2]); - const __m128i sinpi3 = _mm_set1_epi32((int)sinpi[3]); - const __m128i sinpi4 = _mm_set1_epi32((int)sinpi[4]); - __m128i t; - __m128i s0, s1, s2, s3, s4, s5, s6, s7; - __m128i x0, x1, x2, x3; - __m128i u0, u1, u2, u3; - __m128i v0, v1, v2, v3; - - s0 = _mm_mullo_epi32(in[0], sinpi1); - s1 = _mm_mullo_epi32(in[0], sinpi4); - s2 = _mm_mullo_epi32(in[1], sinpi2); - s3 = _mm_mullo_epi32(in[1], sinpi1); - s4 = _mm_mullo_epi32(in[2], sinpi3); - s5 = _mm_mullo_epi32(in[3], sinpi4); - s6 = _mm_mullo_epi32(in[3], sinpi2); - t = _mm_add_epi32(in[0], in[1]); - s7 = _mm_sub_epi32(t, in[3]); - - t = _mm_add_epi32(s0, s2); - x0 = _mm_add_epi32(t, s5); - x1 = _mm_mullo_epi32(s7, sinpi3); - t = _mm_sub_epi32(s1, s3); - x2 = _mm_add_epi32(t, s6); - x3 = s4; - - s0 = _mm_add_epi32(x0, x3); - s1 = x1; - s2 = _mm_sub_epi32(x2, x3); - t = _mm_sub_epi32(x2, x0); - s3 = _mm_add_epi32(t, x3); - - u0 = _mm_add_epi32(s0, rnding); - u0 = _mm_srai_epi32(u0, bit); - - u1 = _mm_add_epi32(s1, rnding); - u1 = _mm_srai_epi32(u1, bit); - - u2 = _mm_add_epi32(s2, rnding); - u2 = _mm_srai_epi32(u2, bit); - - u3 = _mm_add_epi32(s3, rnding); - u3 = _mm_srai_epi32(u3, bit); - - v0 = _mm_unpacklo_epi32(u0, u1); - v1 = _mm_unpackhi_epi32(u0, u1); - v2 = _mm_unpacklo_epi32(u2, u3); - v3 = _mm_unpackhi_epi32(u2, u3); - - in[0] = _mm_unpacklo_epi64(v0, v2); - in[1] = _mm_unpackhi_epi64(v0, v2); - in[2] = _mm_unpacklo_epi64(v1, v3); - in[3] = _mm_unpackhi_epi64(v1, v3); -} - -void av1_fwd_txfm2d_4x4_sse4_1(const int16_t *input, int32_t *coeff, - int input_stride, TX_TYPE tx_type, int bd) { - __m128i in[4]; - const int8_t *shift = fwd_txfm_shift_ls[TX_4X4]; - const int txw_idx = get_txw_idx(TX_4X4); - const int txh_idx = get_txh_idx(TX_4X4); - - switch (tx_type) { - case DCT_DCT: - load_buffer_4x4(input, in, input_stride, 0, 0, shift[0]); - fdct4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]); - fdct4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]); - write_buffer_4x4(in, coeff); - break; - case ADST_DCT: - load_buffer_4x4(input, in, input_stride, 0, 0, shift[0]); - fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]); - fdct4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]); - write_buffer_4x4(in, coeff); - break; - case DCT_ADST: - load_buffer_4x4(input, in, input_stride, 0, 0, shift[0]); - fdct4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]); - fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]); - write_buffer_4x4(in, coeff); - break; - case ADST_ADST: - load_buffer_4x4(input, in, input_stride, 0, 0, shift[0]); - fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]); - fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]); - write_buffer_4x4(in, coeff); - break; - case FLIPADST_DCT: - load_buffer_4x4(input, in, input_stride, 1, 0, shift[0]); - fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]); - fdct4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]); - write_buffer_4x4(in, coeff); - break; - case DCT_FLIPADST: - load_buffer_4x4(input, in, input_stride, 0, 1, shift[0]); - fdct4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]); - fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]); - write_buffer_4x4(in, coeff); - break; - case FLIPADST_FLIPADST: - load_buffer_4x4(input, in, input_stride, 1, 1, shift[0]); - fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]); - fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]); - write_buffer_4x4(in, coeff); - break; - case ADST_FLIPADST: - load_buffer_4x4(input, in, input_stride, 0, 1, shift[0]); - fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]); - fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]); - write_buffer_4x4(in, coeff); - break; - case FLIPADST_ADST: - load_buffer_4x4(input, in, input_stride, 1, 0, shift[0]); - fadst4x4_sse4_1(in, fwd_cos_bit_col[txw_idx][txh_idx]); - fadst4x4_sse4_1(in, fwd_cos_bit_row[txw_idx][txh_idx]); - write_buffer_4x4(in, coeff); - break; - default: assert(0); - } - (void)bd; -} - -static INLINE void load_buffer_8x8(const int16_t *input, __m128i *in, - int stride, int flipud, int fliplr, - int shift) { - __m128i u; - if (!flipud) { - in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride)); - in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride)); - in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride)); - in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride)); - in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride)); - in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride)); - in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride)); - in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride)); - } else { - in[0] = _mm_load_si128((const __m128i *)(input + 7 * stride)); - in[1] = _mm_load_si128((const __m128i *)(input + 6 * stride)); - in[2] = _mm_load_si128((const __m128i *)(input + 5 * stride)); - in[3] = _mm_load_si128((const __m128i *)(input + 4 * stride)); - in[4] = _mm_load_si128((const __m128i *)(input + 3 * stride)); - in[5] = _mm_load_si128((const __m128i *)(input + 2 * stride)); - in[6] = _mm_load_si128((const __m128i *)(input + 1 * stride)); - in[7] = _mm_load_si128((const __m128i *)(input + 0 * stride)); - } - - if (fliplr) { - in[0] = mm_reverse_epi16(in[0]); - in[1] = mm_reverse_epi16(in[1]); - in[2] = mm_reverse_epi16(in[2]); - in[3] = mm_reverse_epi16(in[3]); - in[4] = mm_reverse_epi16(in[4]); - in[5] = mm_reverse_epi16(in[5]); - in[6] = mm_reverse_epi16(in[6]); - in[7] = mm_reverse_epi16(in[7]); - } - - u = _mm_unpackhi_epi64(in[4], in[4]); - in[8] = _mm_cvtepi16_epi32(in[4]); - in[9] = _mm_cvtepi16_epi32(u); - - u = _mm_unpackhi_epi64(in[5], in[5]); - in[10] = _mm_cvtepi16_epi32(in[5]); - in[11] = _mm_cvtepi16_epi32(u); - - u = _mm_unpackhi_epi64(in[6], in[6]); - in[12] = _mm_cvtepi16_epi32(in[6]); - in[13] = _mm_cvtepi16_epi32(u); - - u = _mm_unpackhi_epi64(in[7], in[7]); - in[14] = _mm_cvtepi16_epi32(in[7]); - in[15] = _mm_cvtepi16_epi32(u); - - u = _mm_unpackhi_epi64(in[3], in[3]); - in[6] = _mm_cvtepi16_epi32(in[3]); - in[7] = _mm_cvtepi16_epi32(u); - - u = _mm_unpackhi_epi64(in[2], in[2]); - in[4] = _mm_cvtepi16_epi32(in[2]); - in[5] = _mm_cvtepi16_epi32(u); - - u = _mm_unpackhi_epi64(in[1], in[1]); - in[2] = _mm_cvtepi16_epi32(in[1]); - in[3] = _mm_cvtepi16_epi32(u); - - u = _mm_unpackhi_epi64(in[0], in[0]); - in[0] = _mm_cvtepi16_epi32(in[0]); - in[1] = _mm_cvtepi16_epi32(u); - - in[0] = _mm_slli_epi32(in[0], shift); - in[1] = _mm_slli_epi32(in[1], shift); - in[2] = _mm_slli_epi32(in[2], shift); - in[3] = _mm_slli_epi32(in[3], shift); - in[4] = _mm_slli_epi32(in[4], shift); - in[5] = _mm_slli_epi32(in[5], shift); - in[6] = _mm_slli_epi32(in[6], shift); - in[7] = _mm_slli_epi32(in[7], shift); - - in[8] = _mm_slli_epi32(in[8], shift); - in[9] = _mm_slli_epi32(in[9], shift); - in[10] = _mm_slli_epi32(in[10], shift); - in[11] = _mm_slli_epi32(in[11], shift); - in[12] = _mm_slli_epi32(in[12], shift); - in[13] = _mm_slli_epi32(in[13], shift); - in[14] = _mm_slli_epi32(in[14], shift); - in[15] = _mm_slli_epi32(in[15], shift); -} - -static INLINE void col_txfm_8x8_rounding(__m128i *in, int shift) { - const __m128i rounding = _mm_set1_epi32(1 << (shift - 1)); - - in[0] = _mm_add_epi32(in[0], rounding); - in[1] = _mm_add_epi32(in[1], rounding); - in[2] = _mm_add_epi32(in[2], rounding); - in[3] = _mm_add_epi32(in[3], rounding); - in[4] = _mm_add_epi32(in[4], rounding); - in[5] = _mm_add_epi32(in[5], rounding); - in[6] = _mm_add_epi32(in[6], rounding); - in[7] = _mm_add_epi32(in[7], rounding); - in[8] = _mm_add_epi32(in[8], rounding); - in[9] = _mm_add_epi32(in[9], rounding); - in[10] = _mm_add_epi32(in[10], rounding); - in[11] = _mm_add_epi32(in[11], rounding); - in[12] = _mm_add_epi32(in[12], rounding); - in[13] = _mm_add_epi32(in[13], rounding); - in[14] = _mm_add_epi32(in[14], rounding); - in[15] = _mm_add_epi32(in[15], rounding); - - in[0] = _mm_srai_epi32(in[0], shift); - in[1] = _mm_srai_epi32(in[1], shift); - in[2] = _mm_srai_epi32(in[2], shift); - in[3] = _mm_srai_epi32(in[3], shift); - in[4] = _mm_srai_epi32(in[4], shift); - in[5] = _mm_srai_epi32(in[5], shift); - in[6] = _mm_srai_epi32(in[6], shift); - in[7] = _mm_srai_epi32(in[7], shift); - in[8] = _mm_srai_epi32(in[8], shift); - in[9] = _mm_srai_epi32(in[9], shift); - in[10] = _mm_srai_epi32(in[10], shift); - in[11] = _mm_srai_epi32(in[11], shift); - in[12] = _mm_srai_epi32(in[12], shift); - in[13] = _mm_srai_epi32(in[13], shift); - in[14] = _mm_srai_epi32(in[14], shift); - in[15] = _mm_srai_epi32(in[15], shift); -} - -static INLINE void write_buffer_8x8(const __m128i *res, int32_t *output) { - _mm_store_si128((__m128i *)(output + 0 * 4), res[0]); - _mm_store_si128((__m128i *)(output + 1 * 4), res[1]); - _mm_store_si128((__m128i *)(output + 2 * 4), res[2]); - _mm_store_si128((__m128i *)(output + 3 * 4), res[3]); - - _mm_store_si128((__m128i *)(output + 4 * 4), res[4]); - _mm_store_si128((__m128i *)(output + 5 * 4), res[5]); - _mm_store_si128((__m128i *)(output + 6 * 4), res[6]); - _mm_store_si128((__m128i *)(output + 7 * 4), res[7]); - - _mm_store_si128((__m128i *)(output + 8 * 4), res[8]); - _mm_store_si128((__m128i *)(output + 9 * 4), res[9]); - _mm_store_si128((__m128i *)(output + 10 * 4), res[10]); - _mm_store_si128((__m128i *)(output + 11 * 4), res[11]); - - _mm_store_si128((__m128i *)(output + 12 * 4), res[12]); - _mm_store_si128((__m128i *)(output + 13 * 4), res[13]); - _mm_store_si128((__m128i *)(output + 14 * 4), res[14]); - _mm_store_si128((__m128i *)(output + 15 * 4), res[15]); -} - -static INLINE void write_buffer_16x8(const __m128i *res, int32_t *output, - const int stride) { - _mm_storeu_si128((__m128i *)(output), res[0]); - _mm_storeu_si128((__m128i *)(output + 4), res[1]); - _mm_storeu_si128((__m128i *)(output + stride), res[2]); - _mm_storeu_si128((__m128i *)(output + stride + 4), res[3]); - - _mm_storeu_si128((__m128i *)(output + (stride * 2)), res[4]); - _mm_storeu_si128((__m128i *)(output + (stride * 2) + 4), res[5]); - _mm_storeu_si128((__m128i *)(output + (stride * 3)), res[6]); - _mm_storeu_si128((__m128i *)(output + (stride * 3) + 4), res[7]); - - _mm_storeu_si128((__m128i *)(output + (stride * 4)), res[8]); - _mm_storeu_si128((__m128i *)(output + (stride * 4) + 4), res[9]); - _mm_storeu_si128((__m128i *)(output + (stride * 5)), res[10]); - _mm_storeu_si128((__m128i *)(output + (stride * 5) + 4), res[11]); - - _mm_storeu_si128((__m128i *)(output + (stride * 6)), res[12]); - _mm_storeu_si128((__m128i *)(output + (stride * 6) + 4), res[13]); - _mm_storeu_si128((__m128i *)(output + (stride * 7)), res[14]); - _mm_storeu_si128((__m128i *)(output + (stride * 7) + 4), res[15]); -} - -static void fdct8x8_sse4_1(__m128i *in, __m128i *out, int bit, - const int col_num) { - (void)(col_num); - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospim32 = _mm_set1_epi32(-cospi[32]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospi40 = _mm_set1_epi32(cospi[40]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - __m128i u[8], v[8]; - - // Even 8 points 0, 2, ..., 14 - // stage 0 - // stage 1 - u[0] = _mm_add_epi32(in[0], in[14]); - v[7] = _mm_sub_epi32(in[0], in[14]); // v[7] - u[1] = _mm_add_epi32(in[2], in[12]); - u[6] = _mm_sub_epi32(in[2], in[12]); - u[2] = _mm_add_epi32(in[4], in[10]); - u[5] = _mm_sub_epi32(in[4], in[10]); - u[3] = _mm_add_epi32(in[6], in[8]); - v[4] = _mm_sub_epi32(in[6], in[8]); // v[4] - - // stage 2 - v[0] = _mm_add_epi32(u[0], u[3]); - v[3] = _mm_sub_epi32(u[0], u[3]); - v[1] = _mm_add_epi32(u[1], u[2]); - v[2] = _mm_sub_epi32(u[1], u[2]); - - v[5] = _mm_mullo_epi32(u[5], cospim32); - v[6] = _mm_mullo_epi32(u[6], cospi32); - v[5] = _mm_add_epi32(v[5], v[6]); - v[5] = _mm_add_epi32(v[5], rnding); - v[5] = _mm_srai_epi32(v[5], bit); - - u[0] = _mm_mullo_epi32(u[5], cospi32); - v[6] = _mm_mullo_epi32(u[6], cospim32); - v[6] = _mm_sub_epi32(u[0], v[6]); - v[6] = _mm_add_epi32(v[6], rnding); - v[6] = _mm_srai_epi32(v[6], bit); - - // stage 3 - // type 0 - v[0] = _mm_mullo_epi32(v[0], cospi32); - v[1] = _mm_mullo_epi32(v[1], cospi32); - u[0] = _mm_add_epi32(v[0], v[1]); - u[0] = _mm_add_epi32(u[0], rnding); - u[0] = _mm_srai_epi32(u[0], bit); - - u[1] = _mm_sub_epi32(v[0], v[1]); - u[1] = _mm_add_epi32(u[1], rnding); - u[1] = _mm_srai_epi32(u[1], bit); - - // type 1 - v[0] = _mm_mullo_epi32(v[2], cospi48); - v[1] = _mm_mullo_epi32(v[3], cospi16); - u[2] = _mm_add_epi32(v[0], v[1]); - u[2] = _mm_add_epi32(u[2], rnding); - u[2] = _mm_srai_epi32(u[2], bit); - - v[0] = _mm_mullo_epi32(v[2], cospi16); - v[1] = _mm_mullo_epi32(v[3], cospi48); - u[3] = _mm_sub_epi32(v[1], v[0]); - u[3] = _mm_add_epi32(u[3], rnding); - u[3] = _mm_srai_epi32(u[3], bit); - - u[4] = _mm_add_epi32(v[4], v[5]); - u[5] = _mm_sub_epi32(v[4], v[5]); - u[6] = _mm_sub_epi32(v[7], v[6]); - u[7] = _mm_add_epi32(v[7], v[6]); - - // stage 4 - // stage 5 - v[0] = _mm_mullo_epi32(u[4], cospi56); - v[1] = _mm_mullo_epi32(u[7], cospi8); - v[0] = _mm_add_epi32(v[0], v[1]); - v[0] = _mm_add_epi32(v[0], rnding); - out[2] = _mm_srai_epi32(v[0], bit); // buf0[4] - - v[0] = _mm_mullo_epi32(u[4], cospi8); - v[1] = _mm_mullo_epi32(u[7], cospi56); - v[0] = _mm_sub_epi32(v[1], v[0]); - v[0] = _mm_add_epi32(v[0], rnding); - out[14] = _mm_srai_epi32(v[0], bit); // buf0[7] - - v[0] = _mm_mullo_epi32(u[5], cospi24); - v[1] = _mm_mullo_epi32(u[6], cospi40); - v[0] = _mm_add_epi32(v[0], v[1]); - v[0] = _mm_add_epi32(v[0], rnding); - out[10] = _mm_srai_epi32(v[0], bit); // buf0[5] - - v[0] = _mm_mullo_epi32(u[5], cospi40); - v[1] = _mm_mullo_epi32(u[6], cospi24); - v[0] = _mm_sub_epi32(v[1], v[0]); - v[0] = _mm_add_epi32(v[0], rnding); - out[6] = _mm_srai_epi32(v[0], bit); // buf0[6] - - out[0] = u[0]; // buf0[0] - out[8] = u[1]; // buf0[1] - out[4] = u[2]; // buf0[2] - out[12] = u[3]; // buf0[3] - - // Odd 8 points: 1, 3, ..., 15 - // stage 0 - // stage 1 - u[0] = _mm_add_epi32(in[1], in[15]); - v[7] = _mm_sub_epi32(in[1], in[15]); // v[7] - u[1] = _mm_add_epi32(in[3], in[13]); - u[6] = _mm_sub_epi32(in[3], in[13]); - u[2] = _mm_add_epi32(in[5], in[11]); - u[5] = _mm_sub_epi32(in[5], in[11]); - u[3] = _mm_add_epi32(in[7], in[9]); - v[4] = _mm_sub_epi32(in[7], in[9]); // v[4] - - // stage 2 - v[0] = _mm_add_epi32(u[0], u[3]); - v[3] = _mm_sub_epi32(u[0], u[3]); - v[1] = _mm_add_epi32(u[1], u[2]); - v[2] = _mm_sub_epi32(u[1], u[2]); - - v[5] = _mm_mullo_epi32(u[5], cospim32); - v[6] = _mm_mullo_epi32(u[6], cospi32); - v[5] = _mm_add_epi32(v[5], v[6]); - v[5] = _mm_add_epi32(v[5], rnding); - v[5] = _mm_srai_epi32(v[5], bit); - - u[0] = _mm_mullo_epi32(u[5], cospi32); - v[6] = _mm_mullo_epi32(u[6], cospim32); - v[6] = _mm_sub_epi32(u[0], v[6]); - v[6] = _mm_add_epi32(v[6], rnding); - v[6] = _mm_srai_epi32(v[6], bit); - - // stage 3 - // type 0 - v[0] = _mm_mullo_epi32(v[0], cospi32); - v[1] = _mm_mullo_epi32(v[1], cospi32); - u[0] = _mm_add_epi32(v[0], v[1]); - u[0] = _mm_add_epi32(u[0], rnding); - u[0] = _mm_srai_epi32(u[0], bit); - - u[1] = _mm_sub_epi32(v[0], v[1]); - u[1] = _mm_add_epi32(u[1], rnding); - u[1] = _mm_srai_epi32(u[1], bit); - - // type 1 - v[0] = _mm_mullo_epi32(v[2], cospi48); - v[1] = _mm_mullo_epi32(v[3], cospi16); - u[2] = _mm_add_epi32(v[0], v[1]); - u[2] = _mm_add_epi32(u[2], rnding); - u[2] = _mm_srai_epi32(u[2], bit); - - v[0] = _mm_mullo_epi32(v[2], cospi16); - v[1] = _mm_mullo_epi32(v[3], cospi48); - u[3] = _mm_sub_epi32(v[1], v[0]); - u[3] = _mm_add_epi32(u[3], rnding); - u[3] = _mm_srai_epi32(u[3], bit); - - u[4] = _mm_add_epi32(v[4], v[5]); - u[5] = _mm_sub_epi32(v[4], v[5]); - u[6] = _mm_sub_epi32(v[7], v[6]); - u[7] = _mm_add_epi32(v[7], v[6]); - - // stage 4 - // stage 5 - v[0] = _mm_mullo_epi32(u[4], cospi56); - v[1] = _mm_mullo_epi32(u[7], cospi8); - v[0] = _mm_add_epi32(v[0], v[1]); - v[0] = _mm_add_epi32(v[0], rnding); - out[3] = _mm_srai_epi32(v[0], bit); // buf0[4] - - v[0] = _mm_mullo_epi32(u[4], cospi8); - v[1] = _mm_mullo_epi32(u[7], cospi56); - v[0] = _mm_sub_epi32(v[1], v[0]); - v[0] = _mm_add_epi32(v[0], rnding); - out[15] = _mm_srai_epi32(v[0], bit); // buf0[7] - - v[0] = _mm_mullo_epi32(u[5], cospi24); - v[1] = _mm_mullo_epi32(u[6], cospi40); - v[0] = _mm_add_epi32(v[0], v[1]); - v[0] = _mm_add_epi32(v[0], rnding); - out[11] = _mm_srai_epi32(v[0], bit); // buf0[5] - - v[0] = _mm_mullo_epi32(u[5], cospi40); - v[1] = _mm_mullo_epi32(u[6], cospi24); - v[0] = _mm_sub_epi32(v[1], v[0]); - v[0] = _mm_add_epi32(v[0], rnding); - out[7] = _mm_srai_epi32(v[0], bit); // buf0[6] - - out[1] = u[0]; // buf0[0] - out[9] = u[1]; // buf0[1] - out[5] = u[2]; // buf0[2] - out[13] = u[3]; // buf0[3] -} - -static void fadst8x8_sse4_1(__m128i *in, __m128i *out, int bit, - const int col_num) { - (void)(col_num); - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospim16 = _mm_set1_epi32(-cospi[16]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i cospi4 = _mm_set1_epi32(cospi[4]); - const __m128i cospim4 = _mm_set1_epi32(-cospi[4]); - const __m128i cospi60 = _mm_set1_epi32(cospi[60]); - const __m128i cospi20 = _mm_set1_epi32(cospi[20]); - const __m128i cospim20 = _mm_set1_epi32(-cospi[20]); - const __m128i cospi44 = _mm_set1_epi32(cospi[44]); - const __m128i cospi28 = _mm_set1_epi32(cospi[28]); - const __m128i cospi36 = _mm_set1_epi32(cospi[36]); - const __m128i cospim36 = _mm_set1_epi32(-cospi[36]); - const __m128i cospi52 = _mm_set1_epi32(cospi[52]); - const __m128i cospim52 = _mm_set1_epi32(-cospi[52]); - const __m128i cospi12 = _mm_set1_epi32(cospi[12]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const __m128i zero = _mm_setzero_si128(); - __m128i u0, u1, u2, u3, u4, u5, u6, u7; - __m128i v0, v1, v2, v3, v4, v5, v6, v7; - __m128i x, y; - int col; - - // Note: - // Even column: 0, 2, ..., 14 - // Odd column: 1, 3, ..., 15 - // one even column plus one odd column constructs one row (8 coeffs) - // total we have 8 rows (8x8). - for (col = 0; col < 2; ++col) { - // stage 0 - // stage 1 - u0 = in[2 * 0 + col]; - u1 = _mm_sub_epi32(zero, in[2 * 7 + col]); - u2 = _mm_sub_epi32(zero, in[2 * 3 + col]); - u3 = in[2 * 4 + col]; - u4 = _mm_sub_epi32(zero, in[2 * 1 + col]); - u5 = in[2 * 6 + col]; - u6 = in[2 * 2 + col]; - u7 = _mm_sub_epi32(zero, in[2 * 5 + col]); - - // stage 2 - v0 = u0; - v1 = u1; - - x = _mm_mullo_epi32(u2, cospi32); - y = _mm_mullo_epi32(u3, cospi32); - v2 = _mm_add_epi32(x, y); - v2 = _mm_add_epi32(v2, rnding); - v2 = _mm_srai_epi32(v2, bit); - - v3 = _mm_sub_epi32(x, y); - v3 = _mm_add_epi32(v3, rnding); - v3 = _mm_srai_epi32(v3, bit); - - v4 = u4; - v5 = u5; - - x = _mm_mullo_epi32(u6, cospi32); - y = _mm_mullo_epi32(u7, cospi32); - v6 = _mm_add_epi32(x, y); - v6 = _mm_add_epi32(v6, rnding); - v6 = _mm_srai_epi32(v6, bit); - - v7 = _mm_sub_epi32(x, y); - v7 = _mm_add_epi32(v7, rnding); - v7 = _mm_srai_epi32(v7, bit); - - // stage 3 - u0 = _mm_add_epi32(v0, v2); - u1 = _mm_add_epi32(v1, v3); - u2 = _mm_sub_epi32(v0, v2); - u3 = _mm_sub_epi32(v1, v3); - u4 = _mm_add_epi32(v4, v6); - u5 = _mm_add_epi32(v5, v7); - u6 = _mm_sub_epi32(v4, v6); - u7 = _mm_sub_epi32(v5, v7); - - // stage 4 - v0 = u0; - v1 = u1; - v2 = u2; - v3 = u3; - - x = _mm_mullo_epi32(u4, cospi16); - y = _mm_mullo_epi32(u5, cospi48); - v4 = _mm_add_epi32(x, y); - v4 = _mm_add_epi32(v4, rnding); - v4 = _mm_srai_epi32(v4, bit); - - x = _mm_mullo_epi32(u4, cospi48); - y = _mm_mullo_epi32(u5, cospim16); - v5 = _mm_add_epi32(x, y); - v5 = _mm_add_epi32(v5, rnding); - v5 = _mm_srai_epi32(v5, bit); - - x = _mm_mullo_epi32(u6, cospim48); - y = _mm_mullo_epi32(u7, cospi16); - v6 = _mm_add_epi32(x, y); - v6 = _mm_add_epi32(v6, rnding); - v6 = _mm_srai_epi32(v6, bit); - - x = _mm_mullo_epi32(u6, cospi16); - y = _mm_mullo_epi32(u7, cospi48); - v7 = _mm_add_epi32(x, y); - v7 = _mm_add_epi32(v7, rnding); - v7 = _mm_srai_epi32(v7, bit); - - // stage 5 - u0 = _mm_add_epi32(v0, v4); - u1 = _mm_add_epi32(v1, v5); - u2 = _mm_add_epi32(v2, v6); - u3 = _mm_add_epi32(v3, v7); - u4 = _mm_sub_epi32(v0, v4); - u5 = _mm_sub_epi32(v1, v5); - u6 = _mm_sub_epi32(v2, v6); - u7 = _mm_sub_epi32(v3, v7); - - // stage 6 - x = _mm_mullo_epi32(u0, cospi4); - y = _mm_mullo_epi32(u1, cospi60); - v0 = _mm_add_epi32(x, y); - v0 = _mm_add_epi32(v0, rnding); - v0 = _mm_srai_epi32(v0, bit); - - x = _mm_mullo_epi32(u0, cospi60); - y = _mm_mullo_epi32(u1, cospim4); - v1 = _mm_add_epi32(x, y); - v1 = _mm_add_epi32(v1, rnding); - v1 = _mm_srai_epi32(v1, bit); - - x = _mm_mullo_epi32(u2, cospi20); - y = _mm_mullo_epi32(u3, cospi44); - v2 = _mm_add_epi32(x, y); - v2 = _mm_add_epi32(v2, rnding); - v2 = _mm_srai_epi32(v2, bit); - - x = _mm_mullo_epi32(u2, cospi44); - y = _mm_mullo_epi32(u3, cospim20); - v3 = _mm_add_epi32(x, y); - v3 = _mm_add_epi32(v3, rnding); - v3 = _mm_srai_epi32(v3, bit); - - x = _mm_mullo_epi32(u4, cospi36); - y = _mm_mullo_epi32(u5, cospi28); - v4 = _mm_add_epi32(x, y); - v4 = _mm_add_epi32(v4, rnding); - v4 = _mm_srai_epi32(v4, bit); - - x = _mm_mullo_epi32(u4, cospi28); - y = _mm_mullo_epi32(u5, cospim36); - v5 = _mm_add_epi32(x, y); - v5 = _mm_add_epi32(v5, rnding); - v5 = _mm_srai_epi32(v5, bit); - - x = _mm_mullo_epi32(u6, cospi52); - y = _mm_mullo_epi32(u7, cospi12); - v6 = _mm_add_epi32(x, y); - v6 = _mm_add_epi32(v6, rnding); - v6 = _mm_srai_epi32(v6, bit); - - x = _mm_mullo_epi32(u6, cospi12); - y = _mm_mullo_epi32(u7, cospim52); - v7 = _mm_add_epi32(x, y); - v7 = _mm_add_epi32(v7, rnding); - v7 = _mm_srai_epi32(v7, bit); - - // stage 7 - out[2 * 0 + col] = v1; - out[2 * 1 + col] = v6; - out[2 * 2 + col] = v3; - out[2 * 3 + col] = v4; - out[2 * 4 + col] = v5; - out[2 * 5 + col] = v2; - out[2 * 6 + col] = v7; - out[2 * 7 + col] = v0; - } -} - -void av1_fwd_txfm2d_8x8_sse4_1(const int16_t *input, int32_t *coeff, int stride, - TX_TYPE tx_type, int bd) { - __m128i in[16], out[16]; - const int8_t *shift = fwd_txfm_shift_ls[TX_8X8]; - const int txw_idx = get_txw_idx(TX_8X8); - const int txh_idx = get_txh_idx(TX_8X8); - - switch (tx_type) { - case DCT_DCT: - load_buffer_8x8(input, in, stride, 0, 0, shift[0]); - fdct8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0); - col_txfm_8x8_rounding(out, -shift[1]); - transpose_8x8(out, in); - fdct8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0); - transpose_8x8(out, in); - write_buffer_8x8(in, coeff); - break; - case ADST_DCT: - load_buffer_8x8(input, in, stride, 0, 0, shift[0]); - fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0); - col_txfm_8x8_rounding(out, -shift[1]); - transpose_8x8(out, in); - fdct8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0); - transpose_8x8(out, in); - write_buffer_8x8(in, coeff); - break; - case DCT_ADST: - load_buffer_8x8(input, in, stride, 0, 0, shift[0]); - fdct8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0); - col_txfm_8x8_rounding(out, -shift[1]); - transpose_8x8(out, in); - fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0); - transpose_8x8(out, in); - write_buffer_8x8(in, coeff); - break; - case ADST_ADST: - load_buffer_8x8(input, in, stride, 0, 0, shift[0]); - fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0); - col_txfm_8x8_rounding(out, -shift[1]); - transpose_8x8(out, in); - fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0); - transpose_8x8(out, in); - write_buffer_8x8(in, coeff); - break; - case FLIPADST_DCT: - load_buffer_8x8(input, in, stride, 1, 0, shift[0]); - fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0); - col_txfm_8x8_rounding(out, -shift[1]); - transpose_8x8(out, in); - fdct8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0); - transpose_8x8(out, in); - write_buffer_8x8(in, coeff); - break; - case DCT_FLIPADST: - load_buffer_8x8(input, in, stride, 0, 1, shift[0]); - fdct8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0); - col_txfm_8x8_rounding(out, -shift[1]); - transpose_8x8(out, in); - fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0); - transpose_8x8(out, in); - write_buffer_8x8(in, coeff); - break; - case FLIPADST_FLIPADST: - load_buffer_8x8(input, in, stride, 1, 1, shift[0]); - fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0); - col_txfm_8x8_rounding(out, -shift[1]); - transpose_8x8(out, in); - fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0); - transpose_8x8(out, in); - write_buffer_8x8(in, coeff); - break; - case ADST_FLIPADST: - load_buffer_8x8(input, in, stride, 0, 1, shift[0]); - fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0); - col_txfm_8x8_rounding(out, -shift[1]); - transpose_8x8(out, in); - fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0); - transpose_8x8(out, in); - write_buffer_8x8(in, coeff); - break; - case FLIPADST_ADST: - load_buffer_8x8(input, in, stride, 1, 0, shift[0]); - fadst8x8_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], 0); - col_txfm_8x8_rounding(out, -shift[1]); - transpose_8x8(out, in); - fadst8x8_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], 0); - transpose_8x8(out, in); - write_buffer_8x8(in, coeff); - break; - default: assert(0); - } - (void)bd; -} - -// Hybrid Transform 16x16 - -static INLINE void convert_8x8_to_16x16(const __m128i *in, __m128i *out) { - int row_index = 0; - int dst_index = 0; - int src_index = 0; - - // row 0, 1, .., 7 - do { - out[dst_index] = in[src_index]; - out[dst_index + 1] = in[src_index + 1]; - out[dst_index + 2] = in[src_index + 16]; - out[dst_index + 3] = in[src_index + 17]; - dst_index += 4; - src_index += 2; - row_index += 1; - } while (row_index < 8); - - // row 8, 9, ..., 15 - src_index += 16; - do { - out[dst_index] = in[src_index]; - out[dst_index + 1] = in[src_index + 1]; - out[dst_index + 2] = in[src_index + 16]; - out[dst_index + 3] = in[src_index + 17]; - dst_index += 4; - src_index += 2; - row_index += 1; - } while (row_index < 16); -} - -static INLINE void load_buffer_16x16(const int16_t *input, __m128i *out, - int stride, int flipud, int fliplr, - int shift) { - __m128i in[64]; - // Load 4 8x8 blocks - const int16_t *topL = input; - const int16_t *topR = input + 8; - const int16_t *botL = input + 8 * stride; - const int16_t *botR = input + 8 * stride + 8; - - const int16_t *tmp; - - if (flipud) { - // Swap left columns - tmp = topL; - topL = botL; - botL = tmp; - // Swap right columns - tmp = topR; - topR = botR; - botR = tmp; - } - - if (fliplr) { - // Swap top rows - tmp = topL; - topL = topR; - topR = tmp; - // Swap bottom rows - tmp = botL; - botL = botR; - botR = tmp; - } - - // load first 8 columns - load_buffer_8x8(topL, &in[0], stride, flipud, fliplr, shift); - load_buffer_8x8(botL, &in[32], stride, flipud, fliplr, shift); - - // load second 8 columns - load_buffer_8x8(topR, &in[16], stride, flipud, fliplr, shift); - load_buffer_8x8(botR, &in[48], stride, flipud, fliplr, shift); - - convert_8x8_to_16x16(in, out); -} - -static INLINE void load_buffer_8x16(const int16_t *input, __m128i *out, - int stride, int flipud, int fliplr, - int shift) { - const int16_t *topL = input; - const int16_t *botL = input + 8 * stride; - - const int16_t *tmp; - - if (flipud) { - tmp = topL; - topL = botL; - botL = tmp; - } - - load_buffer_8x8(topL, out, stride, flipud, fliplr, shift); - load_buffer_8x8(botL, out + 16, stride, flipud, fliplr, shift); -} - -static void fdct16x16_sse4_1(__m128i *in, __m128i *out, int bit, - const int col_num) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospim32 = _mm_set1_epi32(-cospi[32]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i cospim16 = _mm_set1_epi32(-cospi[16]); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospi40 = _mm_set1_epi32(cospi[40]); - const __m128i cospi60 = _mm_set1_epi32(cospi[60]); - const __m128i cospi4 = _mm_set1_epi32(cospi[4]); - const __m128i cospi28 = _mm_set1_epi32(cospi[28]); - const __m128i cospi36 = _mm_set1_epi32(cospi[36]); - const __m128i cospi44 = _mm_set1_epi32(cospi[44]); - const __m128i cospi20 = _mm_set1_epi32(cospi[20]); - const __m128i cospi12 = _mm_set1_epi32(cospi[12]); - const __m128i cospi52 = _mm_set1_epi32(cospi[52]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - __m128i u[16], v[16], x; - int col; - - // Calculate the column 0, 1, 2, 3 - for (col = 0; col < col_num; ++col) { - // stage 0 - // stage 1 - u[0] = _mm_add_epi32(in[0 * col_num + col], in[15 * col_num + col]); - u[15] = _mm_sub_epi32(in[0 * col_num + col], in[15 * col_num + col]); - u[1] = _mm_add_epi32(in[1 * col_num + col], in[14 * col_num + col]); - u[14] = _mm_sub_epi32(in[1 * col_num + col], in[14 * col_num + col]); - u[2] = _mm_add_epi32(in[2 * col_num + col], in[13 * col_num + col]); - u[13] = _mm_sub_epi32(in[2 * col_num + col], in[13 * col_num + col]); - u[3] = _mm_add_epi32(in[3 * col_num + col], in[12 * col_num + col]); - u[12] = _mm_sub_epi32(in[3 * col_num + col], in[12 * col_num + col]); - u[4] = _mm_add_epi32(in[4 * col_num + col], in[11 * col_num + col]); - u[11] = _mm_sub_epi32(in[4 * col_num + col], in[11 * col_num + col]); - u[5] = _mm_add_epi32(in[5 * col_num + col], in[10 * col_num + col]); - u[10] = _mm_sub_epi32(in[5 * col_num + col], in[10 * col_num + col]); - u[6] = _mm_add_epi32(in[6 * col_num + col], in[9 * col_num + col]); - u[9] = _mm_sub_epi32(in[6 * col_num + col], in[9 * col_num + col]); - u[7] = _mm_add_epi32(in[7 * col_num + col], in[8 * col_num + col]); - u[8] = _mm_sub_epi32(in[7 * col_num + col], in[8 * col_num + col]); - - // stage 2 - v[0] = _mm_add_epi32(u[0], u[7]); - v[7] = _mm_sub_epi32(u[0], u[7]); - v[1] = _mm_add_epi32(u[1], u[6]); - v[6] = _mm_sub_epi32(u[1], u[6]); - v[2] = _mm_add_epi32(u[2], u[5]); - v[5] = _mm_sub_epi32(u[2], u[5]); - v[3] = _mm_add_epi32(u[3], u[4]); - v[4] = _mm_sub_epi32(u[3], u[4]); - v[8] = u[8]; - v[9] = u[9]; - - v[10] = _mm_mullo_epi32(u[10], cospim32); - x = _mm_mullo_epi32(u[13], cospi32); - v[10] = _mm_add_epi32(v[10], x); - v[10] = _mm_add_epi32(v[10], rnding); - v[10] = _mm_srai_epi32(v[10], bit); - - v[13] = _mm_mullo_epi32(u[10], cospi32); - x = _mm_mullo_epi32(u[13], cospim32); - v[13] = _mm_sub_epi32(v[13], x); - v[13] = _mm_add_epi32(v[13], rnding); - v[13] = _mm_srai_epi32(v[13], bit); - - v[11] = _mm_mullo_epi32(u[11], cospim32); - x = _mm_mullo_epi32(u[12], cospi32); - v[11] = _mm_add_epi32(v[11], x); - v[11] = _mm_add_epi32(v[11], rnding); - v[11] = _mm_srai_epi32(v[11], bit); - - v[12] = _mm_mullo_epi32(u[11], cospi32); - x = _mm_mullo_epi32(u[12], cospim32); - v[12] = _mm_sub_epi32(v[12], x); - v[12] = _mm_add_epi32(v[12], rnding); - v[12] = _mm_srai_epi32(v[12], bit); - v[14] = u[14]; - v[15] = u[15]; - - // stage 3 - u[0] = _mm_add_epi32(v[0], v[3]); - u[3] = _mm_sub_epi32(v[0], v[3]); - u[1] = _mm_add_epi32(v[1], v[2]); - u[2] = _mm_sub_epi32(v[1], v[2]); - u[4] = v[4]; - - u[5] = _mm_mullo_epi32(v[5], cospim32); - x = _mm_mullo_epi32(v[6], cospi32); - u[5] = _mm_add_epi32(u[5], x); - u[5] = _mm_add_epi32(u[5], rnding); - u[5] = _mm_srai_epi32(u[5], bit); - - u[6] = _mm_mullo_epi32(v[5], cospi32); - x = _mm_mullo_epi32(v[6], cospim32); - u[6] = _mm_sub_epi32(u[6], x); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - u[7] = v[7]; - u[8] = _mm_add_epi32(v[8], v[11]); - u[11] = _mm_sub_epi32(v[8], v[11]); - u[9] = _mm_add_epi32(v[9], v[10]); - u[10] = _mm_sub_epi32(v[9], v[10]); - u[12] = _mm_sub_epi32(v[15], v[12]); - u[15] = _mm_add_epi32(v[15], v[12]); - u[13] = _mm_sub_epi32(v[14], v[13]); - u[14] = _mm_add_epi32(v[14], v[13]); - - // stage 4 - u[0] = _mm_mullo_epi32(u[0], cospi32); - u[1] = _mm_mullo_epi32(u[1], cospi32); - v[0] = _mm_add_epi32(u[0], u[1]); - v[0] = _mm_add_epi32(v[0], rnding); - v[0] = _mm_srai_epi32(v[0], bit); - - v[1] = _mm_sub_epi32(u[0], u[1]); - v[1] = _mm_add_epi32(v[1], rnding); - v[1] = _mm_srai_epi32(v[1], bit); - - v[2] = _mm_mullo_epi32(u[2], cospi48); - x = _mm_mullo_epi32(u[3], cospi16); - v[2] = _mm_add_epi32(v[2], x); - v[2] = _mm_add_epi32(v[2], rnding); - v[2] = _mm_srai_epi32(v[2], bit); - - v[3] = _mm_mullo_epi32(u[2], cospi16); - x = _mm_mullo_epi32(u[3], cospi48); - v[3] = _mm_sub_epi32(x, v[3]); - v[3] = _mm_add_epi32(v[3], rnding); - v[3] = _mm_srai_epi32(v[3], bit); - - v[4] = _mm_add_epi32(u[4], u[5]); - v[5] = _mm_sub_epi32(u[4], u[5]); - v[6] = _mm_sub_epi32(u[7], u[6]); - v[7] = _mm_add_epi32(u[7], u[6]); - v[8] = u[8]; - - v[9] = _mm_mullo_epi32(u[9], cospim16); - x = _mm_mullo_epi32(u[14], cospi48); - v[9] = _mm_add_epi32(v[9], x); - v[9] = _mm_add_epi32(v[9], rnding); - v[9] = _mm_srai_epi32(v[9], bit); - - v[14] = _mm_mullo_epi32(u[9], cospi48); - x = _mm_mullo_epi32(u[14], cospim16); - v[14] = _mm_sub_epi32(v[14], x); - v[14] = _mm_add_epi32(v[14], rnding); - v[14] = _mm_srai_epi32(v[14], bit); - - v[10] = _mm_mullo_epi32(u[10], cospim48); - x = _mm_mullo_epi32(u[13], cospim16); - v[10] = _mm_add_epi32(v[10], x); - v[10] = _mm_add_epi32(v[10], rnding); - v[10] = _mm_srai_epi32(v[10], bit); - - v[13] = _mm_mullo_epi32(u[10], cospim16); - x = _mm_mullo_epi32(u[13], cospim48); - v[13] = _mm_sub_epi32(v[13], x); - v[13] = _mm_add_epi32(v[13], rnding); - v[13] = _mm_srai_epi32(v[13], bit); - - v[11] = u[11]; - v[12] = u[12]; - v[15] = u[15]; - - // stage 5 - u[0] = v[0]; - u[1] = v[1]; - u[2] = v[2]; - u[3] = v[3]; - - u[4] = _mm_mullo_epi32(v[4], cospi56); - x = _mm_mullo_epi32(v[7], cospi8); - u[4] = _mm_add_epi32(u[4], x); - u[4] = _mm_add_epi32(u[4], rnding); - u[4] = _mm_srai_epi32(u[4], bit); - - u[7] = _mm_mullo_epi32(v[4], cospi8); - x = _mm_mullo_epi32(v[7], cospi56); - u[7] = _mm_sub_epi32(x, u[7]); - u[7] = _mm_add_epi32(u[7], rnding); - u[7] = _mm_srai_epi32(u[7], bit); - - u[5] = _mm_mullo_epi32(v[5], cospi24); - x = _mm_mullo_epi32(v[6], cospi40); - u[5] = _mm_add_epi32(u[5], x); - u[5] = _mm_add_epi32(u[5], rnding); - u[5] = _mm_srai_epi32(u[5], bit); - - u[6] = _mm_mullo_epi32(v[5], cospi40); - x = _mm_mullo_epi32(v[6], cospi24); - u[6] = _mm_sub_epi32(x, u[6]); - u[6] = _mm_add_epi32(u[6], rnding); - u[6] = _mm_srai_epi32(u[6], bit); - - u[8] = _mm_add_epi32(v[8], v[9]); - u[9] = _mm_sub_epi32(v[8], v[9]); - u[10] = _mm_sub_epi32(v[11], v[10]); - u[11] = _mm_add_epi32(v[11], v[10]); - u[12] = _mm_add_epi32(v[12], v[13]); - u[13] = _mm_sub_epi32(v[12], v[13]); - u[14] = _mm_sub_epi32(v[15], v[14]); - u[15] = _mm_add_epi32(v[15], v[14]); - - // stage 6 - v[0] = u[0]; - v[1] = u[1]; - v[2] = u[2]; - v[3] = u[3]; - v[4] = u[4]; - v[5] = u[5]; - v[6] = u[6]; - v[7] = u[7]; - - v[8] = _mm_mullo_epi32(u[8], cospi60); - x = _mm_mullo_epi32(u[15], cospi4); - v[8] = _mm_add_epi32(v[8], x); - v[8] = _mm_add_epi32(v[8], rnding); - v[8] = _mm_srai_epi32(v[8], bit); - - v[15] = _mm_mullo_epi32(u[8], cospi4); - x = _mm_mullo_epi32(u[15], cospi60); - v[15] = _mm_sub_epi32(x, v[15]); - v[15] = _mm_add_epi32(v[15], rnding); - v[15] = _mm_srai_epi32(v[15], bit); - - v[9] = _mm_mullo_epi32(u[9], cospi28); - x = _mm_mullo_epi32(u[14], cospi36); - v[9] = _mm_add_epi32(v[9], x); - v[9] = _mm_add_epi32(v[9], rnding); - v[9] = _mm_srai_epi32(v[9], bit); - - v[14] = _mm_mullo_epi32(u[9], cospi36); - x = _mm_mullo_epi32(u[14], cospi28); - v[14] = _mm_sub_epi32(x, v[14]); - v[14] = _mm_add_epi32(v[14], rnding); - v[14] = _mm_srai_epi32(v[14], bit); - - v[10] = _mm_mullo_epi32(u[10], cospi44); - x = _mm_mullo_epi32(u[13], cospi20); - v[10] = _mm_add_epi32(v[10], x); - v[10] = _mm_add_epi32(v[10], rnding); - v[10] = _mm_srai_epi32(v[10], bit); - - v[13] = _mm_mullo_epi32(u[10], cospi20); - x = _mm_mullo_epi32(u[13], cospi44); - v[13] = _mm_sub_epi32(x, v[13]); - v[13] = _mm_add_epi32(v[13], rnding); - v[13] = _mm_srai_epi32(v[13], bit); - - v[11] = _mm_mullo_epi32(u[11], cospi12); - x = _mm_mullo_epi32(u[12], cospi52); - v[11] = _mm_add_epi32(v[11], x); - v[11] = _mm_add_epi32(v[11], rnding); - v[11] = _mm_srai_epi32(v[11], bit); - - v[12] = _mm_mullo_epi32(u[11], cospi52); - x = _mm_mullo_epi32(u[12], cospi12); - v[12] = _mm_sub_epi32(x, v[12]); - v[12] = _mm_add_epi32(v[12], rnding); - v[12] = _mm_srai_epi32(v[12], bit); - - out[0 * col_num + col] = v[0]; - out[1 * col_num + col] = v[8]; - out[2 * col_num + col] = v[4]; - out[3 * col_num + col] = v[12]; - out[4 * col_num + col] = v[2]; - out[5 * col_num + col] = v[10]; - out[6 * col_num + col] = v[6]; - out[7 * col_num + col] = v[14]; - out[8 * col_num + col] = v[1]; - out[9 * col_num + col] = v[9]; - out[10 * col_num + col] = v[5]; - out[11 * col_num + col] = v[13]; - out[12 * col_num + col] = v[3]; - out[13 * col_num + col] = v[11]; - out[14 * col_num + col] = v[7]; - out[15 * col_num + col] = v[15]; - } -} - -static void fadst16x16_sse4_1(__m128i *in, __m128i *out, int bit, - const int num_cols) { - const int32_t *cospi = cospi_arr(bit); - const __m128i cospi32 = _mm_set1_epi32(cospi[32]); - const __m128i cospi48 = _mm_set1_epi32(cospi[48]); - const __m128i cospi16 = _mm_set1_epi32(cospi[16]); - const __m128i cospim16 = _mm_set1_epi32(-cospi[16]); - const __m128i cospim48 = _mm_set1_epi32(-cospi[48]); - const __m128i cospi8 = _mm_set1_epi32(cospi[8]); - const __m128i cospi56 = _mm_set1_epi32(cospi[56]); - const __m128i cospim56 = _mm_set1_epi32(-cospi[56]); - const __m128i cospim8 = _mm_set1_epi32(-cospi[8]); - const __m128i cospi24 = _mm_set1_epi32(cospi[24]); - const __m128i cospim24 = _mm_set1_epi32(-cospi[24]); - const __m128i cospim40 = _mm_set1_epi32(-cospi[40]); - const __m128i cospi40 = _mm_set1_epi32(cospi[40]); - const __m128i cospi2 = _mm_set1_epi32(cospi[2]); - const __m128i cospi62 = _mm_set1_epi32(cospi[62]); - const __m128i cospim2 = _mm_set1_epi32(-cospi[2]); - const __m128i cospi10 = _mm_set1_epi32(cospi[10]); - const __m128i cospi54 = _mm_set1_epi32(cospi[54]); - const __m128i cospim10 = _mm_set1_epi32(-cospi[10]); - const __m128i cospi18 = _mm_set1_epi32(cospi[18]); - const __m128i cospi46 = _mm_set1_epi32(cospi[46]); - const __m128i cospim18 = _mm_set1_epi32(-cospi[18]); - const __m128i cospi26 = _mm_set1_epi32(cospi[26]); - const __m128i cospi38 = _mm_set1_epi32(cospi[38]); - const __m128i cospim26 = _mm_set1_epi32(-cospi[26]); - const __m128i cospi34 = _mm_set1_epi32(cospi[34]); - const __m128i cospi30 = _mm_set1_epi32(cospi[30]); - const __m128i cospim34 = _mm_set1_epi32(-cospi[34]); - const __m128i cospi42 = _mm_set1_epi32(cospi[42]); - const __m128i cospi22 = _mm_set1_epi32(cospi[22]); - const __m128i cospim42 = _mm_set1_epi32(-cospi[42]); - const __m128i cospi50 = _mm_set1_epi32(cospi[50]); - const __m128i cospi14 = _mm_set1_epi32(cospi[14]); - const __m128i cospim50 = _mm_set1_epi32(-cospi[50]); - const __m128i cospi58 = _mm_set1_epi32(cospi[58]); - const __m128i cospi6 = _mm_set1_epi32(cospi[6]); - const __m128i cospim58 = _mm_set1_epi32(-cospi[58]); - const __m128i rnding = _mm_set1_epi32(1 << (bit - 1)); - const __m128i zero = _mm_setzero_si128(); - - __m128i u[16], v[16], x, y; - int col; - - for (col = 0; col < num_cols; ++col) { - // stage 0 - // stage 1 - u[0] = in[0 * num_cols + col]; - u[1] = _mm_sub_epi32(zero, in[15 * num_cols + col]); - u[2] = _mm_sub_epi32(zero, in[7 * num_cols + col]); - u[3] = in[8 * num_cols + col]; - u[4] = _mm_sub_epi32(zero, in[3 * num_cols + col]); - u[5] = in[12 * num_cols + col]; - u[6] = in[4 * num_cols + col]; - u[7] = _mm_sub_epi32(zero, in[11 * num_cols + col]); - u[8] = _mm_sub_epi32(zero, in[1 * num_cols + col]); - u[9] = in[14 * num_cols + col]; - u[10] = in[6 * num_cols + col]; - u[11] = _mm_sub_epi32(zero, in[9 * num_cols + col]); - u[12] = in[2 * num_cols + col]; - u[13] = _mm_sub_epi32(zero, in[13 * num_cols + col]); - u[14] = _mm_sub_epi32(zero, in[5 * num_cols + col]); - u[15] = in[10 * num_cols + col]; - - // stage 2 - v[0] = u[0]; - v[1] = u[1]; - - x = _mm_mullo_epi32(u[2], cospi32); - y = _mm_mullo_epi32(u[3], cospi32); - v[2] = _mm_add_epi32(x, y); - v[2] = _mm_add_epi32(v[2], rnding); - v[2] = _mm_srai_epi32(v[2], bit); - - v[3] = _mm_sub_epi32(x, y); - v[3] = _mm_add_epi32(v[3], rnding); - v[3] = _mm_srai_epi32(v[3], bit); - - v[4] = u[4]; - v[5] = u[5]; - - x = _mm_mullo_epi32(u[6], cospi32); - y = _mm_mullo_epi32(u[7], cospi32); - v[6] = _mm_add_epi32(x, y); - v[6] = _mm_add_epi32(v[6], rnding); - v[6] = _mm_srai_epi32(v[6], bit); - - v[7] = _mm_sub_epi32(x, y); - v[7] = _mm_add_epi32(v[7], rnding); - v[7] = _mm_srai_epi32(v[7], bit); - - v[8] = u[8]; - v[9] = u[9]; - - x = _mm_mullo_epi32(u[10], cospi32); - y = _mm_mullo_epi32(u[11], cospi32); - v[10] = _mm_add_epi32(x, y); - v[10] = _mm_add_epi32(v[10], rnding); - v[10] = _mm_srai_epi32(v[10], bit); - - v[11] = _mm_sub_epi32(x, y); - v[11] = _mm_add_epi32(v[11], rnding); - v[11] = _mm_srai_epi32(v[11], bit); - - v[12] = u[12]; - v[13] = u[13]; - - x = _mm_mullo_epi32(u[14], cospi32); - y = _mm_mullo_epi32(u[15], cospi32); - v[14] = _mm_add_epi32(x, y); - v[14] = _mm_add_epi32(v[14], rnding); - v[14] = _mm_srai_epi32(v[14], bit); - - v[15] = _mm_sub_epi32(x, y); - v[15] = _mm_add_epi32(v[15], rnding); - v[15] = _mm_srai_epi32(v[15], bit); - - // stage 3 - u[0] = _mm_add_epi32(v[0], v[2]); - u[1] = _mm_add_epi32(v[1], v[3]); - u[2] = _mm_sub_epi32(v[0], v[2]); - u[3] = _mm_sub_epi32(v[1], v[3]); - u[4] = _mm_add_epi32(v[4], v[6]); - u[5] = _mm_add_epi32(v[5], v[7]); - u[6] = _mm_sub_epi32(v[4], v[6]); - u[7] = _mm_sub_epi32(v[5], v[7]); - u[8] = _mm_add_epi32(v[8], v[10]); - u[9] = _mm_add_epi32(v[9], v[11]); - u[10] = _mm_sub_epi32(v[8], v[10]); - u[11] = _mm_sub_epi32(v[9], v[11]); - u[12] = _mm_add_epi32(v[12], v[14]); - u[13] = _mm_add_epi32(v[13], v[15]); - u[14] = _mm_sub_epi32(v[12], v[14]); - u[15] = _mm_sub_epi32(v[13], v[15]); - - // stage 4 - v[0] = u[0]; - v[1] = u[1]; - v[2] = u[2]; - v[3] = u[3]; - v[4] = half_btf_sse4_1(&cospi16, &u[4], &cospi48, &u[5], &rnding, bit); - v[5] = half_btf_sse4_1(&cospi48, &u[4], &cospim16, &u[5], &rnding, bit); - v[6] = half_btf_sse4_1(&cospim48, &u[6], &cospi16, &u[7], &rnding, bit); - v[7] = half_btf_sse4_1(&cospi16, &u[6], &cospi48, &u[7], &rnding, bit); - v[8] = u[8]; - v[9] = u[9]; - v[10] = u[10]; - v[11] = u[11]; - v[12] = half_btf_sse4_1(&cospi16, &u[12], &cospi48, &u[13], &rnding, bit); - v[13] = half_btf_sse4_1(&cospi48, &u[12], &cospim16, &u[13], &rnding, bit); - v[14] = half_btf_sse4_1(&cospim48, &u[14], &cospi16, &u[15], &rnding, bit); - v[15] = half_btf_sse4_1(&cospi16, &u[14], &cospi48, &u[15], &rnding, bit); - - // stage 5 - u[0] = _mm_add_epi32(v[0], v[4]); - u[1] = _mm_add_epi32(v[1], v[5]); - u[2] = _mm_add_epi32(v[2], v[6]); - u[3] = _mm_add_epi32(v[3], v[7]); - u[4] = _mm_sub_epi32(v[0], v[4]); - u[5] = _mm_sub_epi32(v[1], v[5]); - u[6] = _mm_sub_epi32(v[2], v[6]); - u[7] = _mm_sub_epi32(v[3], v[7]); - u[8] = _mm_add_epi32(v[8], v[12]); - u[9] = _mm_add_epi32(v[9], v[13]); - u[10] = _mm_add_epi32(v[10], v[14]); - u[11] = _mm_add_epi32(v[11], v[15]); - u[12] = _mm_sub_epi32(v[8], v[12]); - u[13] = _mm_sub_epi32(v[9], v[13]); - u[14] = _mm_sub_epi32(v[10], v[14]); - u[15] = _mm_sub_epi32(v[11], v[15]); - - // stage 6 - v[0] = u[0]; - v[1] = u[1]; - v[2] = u[2]; - v[3] = u[3]; - v[4] = u[4]; - v[5] = u[5]; - v[6] = u[6]; - v[7] = u[7]; - v[8] = half_btf_sse4_1(&cospi8, &u[8], &cospi56, &u[9], &rnding, bit); - v[9] = half_btf_sse4_1(&cospi56, &u[8], &cospim8, &u[9], &rnding, bit); - v[10] = half_btf_sse4_1(&cospi40, &u[10], &cospi24, &u[11], &rnding, bit); - v[11] = half_btf_sse4_1(&cospi24, &u[10], &cospim40, &u[11], &rnding, bit); - v[12] = half_btf_sse4_1(&cospim56, &u[12], &cospi8, &u[13], &rnding, bit); - v[13] = half_btf_sse4_1(&cospi8, &u[12], &cospi56, &u[13], &rnding, bit); - v[14] = half_btf_sse4_1(&cospim24, &u[14], &cospi40, &u[15], &rnding, bit); - v[15] = half_btf_sse4_1(&cospi40, &u[14], &cospi24, &u[15], &rnding, bit); - - // stage 7 - u[0] = _mm_add_epi32(v[0], v[8]); - u[1] = _mm_add_epi32(v[1], v[9]); - u[2] = _mm_add_epi32(v[2], v[10]); - u[3] = _mm_add_epi32(v[3], v[11]); - u[4] = _mm_add_epi32(v[4], v[12]); - u[5] = _mm_add_epi32(v[5], v[13]); - u[6] = _mm_add_epi32(v[6], v[14]); - u[7] = _mm_add_epi32(v[7], v[15]); - u[8] = _mm_sub_epi32(v[0], v[8]); - u[9] = _mm_sub_epi32(v[1], v[9]); - u[10] = _mm_sub_epi32(v[2], v[10]); - u[11] = _mm_sub_epi32(v[3], v[11]); - u[12] = _mm_sub_epi32(v[4], v[12]); - u[13] = _mm_sub_epi32(v[5], v[13]); - u[14] = _mm_sub_epi32(v[6], v[14]); - u[15] = _mm_sub_epi32(v[7], v[15]); - - // stage 8 - v[0] = half_btf_sse4_1(&cospi2, &u[0], &cospi62, &u[1], &rnding, bit); - v[1] = half_btf_sse4_1(&cospi62, &u[0], &cospim2, &u[1], &rnding, bit); - v[2] = half_btf_sse4_1(&cospi10, &u[2], &cospi54, &u[3], &rnding, bit); - v[3] = half_btf_sse4_1(&cospi54, &u[2], &cospim10, &u[3], &rnding, bit); - v[4] = half_btf_sse4_1(&cospi18, &u[4], &cospi46, &u[5], &rnding, bit); - v[5] = half_btf_sse4_1(&cospi46, &u[4], &cospim18, &u[5], &rnding, bit); - v[6] = half_btf_sse4_1(&cospi26, &u[6], &cospi38, &u[7], &rnding, bit); - v[7] = half_btf_sse4_1(&cospi38, &u[6], &cospim26, &u[7], &rnding, bit); - v[8] = half_btf_sse4_1(&cospi34, &u[8], &cospi30, &u[9], &rnding, bit); - v[9] = half_btf_sse4_1(&cospi30, &u[8], &cospim34, &u[9], &rnding, bit); - v[10] = half_btf_sse4_1(&cospi42, &u[10], &cospi22, &u[11], &rnding, bit); - v[11] = half_btf_sse4_1(&cospi22, &u[10], &cospim42, &u[11], &rnding, bit); - v[12] = half_btf_sse4_1(&cospi50, &u[12], &cospi14, &u[13], &rnding, bit); - v[13] = half_btf_sse4_1(&cospi14, &u[12], &cospim50, &u[13], &rnding, bit); - v[14] = half_btf_sse4_1(&cospi58, &u[14], &cospi6, &u[15], &rnding, bit); - v[15] = half_btf_sse4_1(&cospi6, &u[14], &cospim58, &u[15], &rnding, bit); - - // stage 9 - out[0 * num_cols + col] = v[1]; - out[1 * num_cols + col] = v[14]; - out[2 * num_cols + col] = v[3]; - out[3 * num_cols + col] = v[12]; - out[4 * num_cols + col] = v[5]; - out[5 * num_cols + col] = v[10]; - out[6 * num_cols + col] = v[7]; - out[7 * num_cols + col] = v[8]; - out[8 * num_cols + col] = v[9]; - out[9 * num_cols + col] = v[6]; - out[10 * num_cols + col] = v[11]; - out[11 * num_cols + col] = v[4]; - out[12 * num_cols + col] = v[13]; - out[13 * num_cols + col] = v[2]; - out[14 * num_cols + col] = v[15]; - out[15 * num_cols + col] = v[0]; - } -} - -static void col_txfm_16x16_rounding(__m128i *in, int shift) { - // Note: - // We split 16x16 rounding into 4 sections of 8x8 rounding, - // instead of 4 columns - col_txfm_8x8_rounding(&in[0], shift); - col_txfm_8x8_rounding(&in[16], shift); - col_txfm_8x8_rounding(&in[32], shift); - col_txfm_8x8_rounding(&in[48], shift); -} - -static void col_txfm_8x16_rounding(__m128i *in, int shift) { - col_txfm_8x8_rounding(&in[0], shift); - col_txfm_8x8_rounding(&in[16], shift); -} - -static void write_buffer_16x16(const __m128i *in, int32_t *output) { - const int size_8x8 = 16 * 4; - write_buffer_8x8(&in[0], output); - output += size_8x8; - write_buffer_8x8(&in[16], output); - output += size_8x8; - write_buffer_8x8(&in[32], output); - output += size_8x8; - write_buffer_8x8(&in[48], output); -} - -void av1_fwd_txfm2d_16x16_sse4_1(const int16_t *input, int32_t *coeff, - int stride, TX_TYPE tx_type, int bd) { - __m128i in[64], out[64]; - const int8_t *shift = fwd_txfm_shift_ls[TX_16X16]; - const int txw_idx = get_txw_idx(TX_16X16); - const int txh_idx = get_txh_idx(TX_16X16); - const int col_num = 4; - switch (tx_type) { - case DCT_DCT: - load_buffer_16x16(input, in, stride, 0, 0, shift[0]); - fdct16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num); - col_txfm_16x16_rounding(out, -shift[1]); - transpose_16x16(out, in); - fdct16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num); - transpose_16x16(out, in); - write_buffer_16x16(in, coeff); - break; - case ADST_DCT: - load_buffer_16x16(input, in, stride, 0, 0, shift[0]); - fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num); - col_txfm_16x16_rounding(out, -shift[1]); - transpose_16x16(out, in); - fdct16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num); - transpose_16x16(out, in); - write_buffer_16x16(in, coeff); - break; - case DCT_ADST: - load_buffer_16x16(input, in, stride, 0, 0, shift[0]); - fdct16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num); - col_txfm_16x16_rounding(out, -shift[1]); - transpose_16x16(out, in); - fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num); - transpose_16x16(out, in); - write_buffer_16x16(in, coeff); - break; - case ADST_ADST: - load_buffer_16x16(input, in, stride, 0, 0, shift[0]); - fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num); - col_txfm_16x16_rounding(out, -shift[1]); - transpose_16x16(out, in); - fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num); - transpose_16x16(out, in); - write_buffer_16x16(in, coeff); - break; - case FLIPADST_DCT: - load_buffer_16x16(input, in, stride, 1, 0, shift[0]); - fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num); - col_txfm_16x16_rounding(out, -shift[1]); - transpose_16x16(out, in); - fdct16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num); - transpose_16x16(out, in); - write_buffer_16x16(in, coeff); - break; - case DCT_FLIPADST: - load_buffer_16x16(input, in, stride, 0, 1, shift[0]); - fdct16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num); - col_txfm_16x16_rounding(out, -shift[1]); - transpose_16x16(out, in); - fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num); - transpose_16x16(out, in); - write_buffer_16x16(in, coeff); - break; - case FLIPADST_FLIPADST: - load_buffer_16x16(input, in, stride, 1, 1, shift[0]); - fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num); - col_txfm_16x16_rounding(out, -shift[1]); - transpose_16x16(out, in); - fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num); - transpose_16x16(out, in); - write_buffer_16x16(in, coeff); - break; - case ADST_FLIPADST: - load_buffer_16x16(input, in, stride, 0, 1, shift[0]); - fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num); - col_txfm_16x16_rounding(out, -shift[1]); - transpose_16x16(out, in); - fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num); - transpose_16x16(out, in); - write_buffer_16x16(in, coeff); - break; - case FLIPADST_ADST: - load_buffer_16x16(input, in, stride, 1, 0, shift[0]); - fadst16x16_sse4_1(in, out, fwd_cos_bit_col[txw_idx][txh_idx], col_num); - col_txfm_16x16_rounding(out, -shift[1]); - transpose_16x16(out, in); - fadst16x16_sse4_1(in, out, fwd_cos_bit_row[txw_idx][txh_idx], col_num); - transpose_16x16(out, in); - write_buffer_16x16(in, coeff); - break; - default: assert(0); - } - (void)bd; -} - -static INLINE void flip_buf_sse4_1(__m128i *in, __m128i *out, int size) { - for (int i = 0; i < size; i += 2) in[30 - i] = out[i]; - for (int i = 1; i < size; i += 2) in[size - i] = out[i]; -} - -static const fwd_transform_1d_sse4_1 col_highbd_txfm8x8_arr[TX_TYPES] = { - fdct8x8_sse4_1, // DCT_DCT - fadst8x8_sse4_1, // ADST_DCT - fdct8x8_sse4_1, // DCT_ADST - fadst8x8_sse4_1, // ADST_ADST - fadst8x8_sse4_1, // FLIPADST_DCT - fdct8x8_sse4_1, // DCT_FLIPADST - fadst8x8_sse4_1, // FLIPADST_FLIPADST - fadst8x8_sse4_1, // ADST_FLIPADST - fadst8x8_sse4_1, // FLIPADST_ADST - NULL, // IDTX - NULL, // V_DCT - NULL, // H_DCT - NULL, // V_ADST - NULL, // H_ADST - NULL, // V_FLIPADST - NULL // H_FLIPADST -}; - -static const fwd_transform_1d_sse4_1 row_highbd_txfm8x16_arr[TX_TYPES] = { - fdct16x16_sse4_1, // DCT_DCT - fdct16x16_sse4_1, // ADST_DCT - fadst16x16_sse4_1, // DCT_ADST - fadst16x16_sse4_1, // ADST_ADST - fdct16x16_sse4_1, // FLIPADST_DCT - fadst16x16_sse4_1, // DCT_FLIPADST - fadst16x16_sse4_1, // FLIPADST_FLIPADST - fadst16x16_sse4_1, // ADST_FLIPADST - fadst16x16_sse4_1, // FLIPADST_ADST - NULL, // IDTX - NULL, // V_DCT - NULL, // H_DCT - NULL, // V_ADST - NULL, // H_ADST - NULL, // V_FLIPADST - NULL // H_FLIPADST -}; - -static const fwd_transform_1d_sse4_1 col_highbd_txfm8x16_arr[TX_TYPES] = { - fdct16x16_sse4_1, // DCT_DCT - fadst16x16_sse4_1, // ADST_DCT - fdct16x16_sse4_1, // DCT_ADST - fadst16x16_sse4_1, // ADST_ADST - fadst16x16_sse4_1, // FLIPADST_DCT - fdct16x16_sse4_1, // DCT_FLIPADST - fadst16x16_sse4_1, // FLIPADST_FLIPADST - fadst16x16_sse4_1, // ADST_FLIPADST - fadst16x16_sse4_1, // FLIPADST_ADST - NULL, // IDTX - NULL, // V_DCT - NULL, // H_DCT - NULL, // V_ADST - NULL, // H_ADST - NULL, // V_FLIPADST - NULL // H_FLIPADST -}; -static const fwd_transform_1d_sse4_1 row_highbd_txfm8x8_arr[TX_TYPES] = { - fdct8x8_sse4_1, // DCT_DCT - fdct8x8_sse4_1, // ADST_DCT - fadst8x8_sse4_1, // DCT_ADST - fadst8x8_sse4_1, // ADST_ADST - fdct8x8_sse4_1, // FLIPADST_DCT - fadst8x8_sse4_1, // DCT_FLIPADST - fadst8x8_sse4_1, // FLIPADST_FLIPADST - fadst8x8_sse4_1, // ADST_FLIPADST - fadst8x8_sse4_1, // FLIPADST_ADST - NULL, // IDTX - NULL, // V_DCT - NULL, // H_DCT - NULL, // V_ADST - NULL, // H_ADST - NULL, // V_FLIPADST - NULL // H_FLIPADST -}; - -void av1_fwd_txfm2d_16x8_sse4_1(const int16_t *input, int32_t *coeff, - int stride, TX_TYPE tx_type, int bd) { - __m128i in[32], out[32]; - const int8_t *shift = fwd_txfm_shift_ls[TX_16X8]; - const int txw_idx = get_txw_idx(TX_16X8); - const int txh_idx = get_txh_idx(TX_16X8); - const fwd_transform_1d_sse4_1 col_txfm = col_highbd_txfm8x8_arr[tx_type]; - const fwd_transform_1d_sse4_1 row_txfm = row_highbd_txfm8x16_arr[tx_type]; - int bit = fwd_cos_bit_col[txw_idx][txh_idx]; - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - for (int i = 0; i < 2; i++) { - load_buffer_8x8(input + i * 8, in, stride, ud_flip, 0, shift[0]); - col_txfm(in, in, bit, 0); - col_txfm_8x8_rounding(in, -shift[1]); - transpose_8x8(in, out + i * 16); - } - - if (lr_flip) { - flip_buf_sse4_1(in, out, 32); - row_txfm(in, out, bit, 2); - } else { - row_txfm(out, out, bit, 2); - } - - for (int i = 0; i < 2; i++) { - transpose_8x8(out + i * 16, in); - av1_round_shift_rect_array_32_sse4_1(in, in, 16, -shift[2], NewSqrt2); - write_buffer_16x8(in, coeff + i * 8, 16); - } - - (void)bd; -} - -void av1_fwd_txfm2d_8x16_sse4_1(const int16_t *input, int32_t *coeff, - int stride, TX_TYPE tx_type, int bd) { - __m128i in[32], out[32]; - const int8_t *shift = fwd_txfm_shift_ls[TX_8X16]; - const int txw_idx = get_txw_idx(TX_8X16); - const int txh_idx = get_txh_idx(TX_8X16); - const fwd_transform_1d_sse4_1 col_txfm = col_highbd_txfm8x16_arr[tx_type]; - const fwd_transform_1d_sse4_1 row_txfm = row_highbd_txfm8x8_arr[tx_type]; - int bit = fwd_cos_bit_col[txw_idx][txh_idx]; - int ud_flip, lr_flip; - get_flip_cfg(tx_type, &ud_flip, &lr_flip); - - load_buffer_8x16(input, in, stride, ud_flip, lr_flip, shift[0]); - col_txfm(in, in, bit, 2); - col_txfm_8x16_rounding(in, -shift[1]); - transpose_8x8(in, out); - transpose_8x8(in + 16, out + 16); - - for (int i = 0; i < 2; i++) { - row_txfm(out + i * 16, out, bit, 0); - transpose_8x8(out, in); - av1_round_shift_rect_array_32_sse4_1(in, in, 16, -shift[2], NewSqrt2); - write_buffer_8x8(in, coeff + i * 64); - } - - (void)bd; -} diff --git a/third_party/aom/av1/encoder/x86/pickrst_avx2.c b/third_party/aom/av1/encoder/x86/pickrst_avx2.c deleted file mode 100644 index 06aaaa7ee..000000000 --- a/third_party/aom/av1/encoder/x86/pickrst_avx2.c +++ /dev/null @@ -1,403 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include // AVX2 -#include "aom_dsp/x86/synonyms.h" -#include "aom_dsp/x86/synonyms_avx2.h" -#include "aom_dsp/x86/transpose_sse2.h" - -#include "config/av1_rtcd.h" -#include "av1/common/restoration.h" -#include "av1/encoder/pickrst.h" - -static INLINE void acc_stat_avx2(int32_t *dst, const uint8_t *src, - const __m128i *shuffle, const __m256i *kl) { - const __m128i s = _mm_shuffle_epi8(xx_loadu_128(src), *shuffle); - const __m256i d0 = _mm256_madd_epi16(*kl, _mm256_cvtepu8_epi16(s)); - const __m256i dst0 = yy_loadu_256(dst); - const __m256i r0 = _mm256_add_epi32(dst0, d0); - yy_storeu_256(dst, r0); -} - -static INLINE void acc_stat_win7_one_line_avx2( - const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, - int dgd_stride, const __m128i *shuffle, int32_t *sumX, - int32_t sumY[WIENER_WIN][WIENER_WIN], int32_t M_int[WIENER_WIN][WIENER_WIN], - int32_t H_int[WIENER_WIN2][WIENER_WIN * 8]) { - int j, k, l; - const int wiener_win = WIENER_WIN; - for (j = h_start; j < h_end; j += 2) { - const uint8_t X1 = src[j]; - const uint8_t X2 = src[j + 1]; - *sumX += X1 + X2; - const uint8_t *dgd_ij = dgd + j; - for (k = 0; k < wiener_win; k++) { - const uint8_t *dgd_ijk = dgd_ij + k * dgd_stride; - for (l = 0; l < wiener_win; l++) { - int32_t *H_ = &H_int[(l * wiener_win + k)][0]; - const uint8_t D1 = dgd_ijk[l]; - const uint8_t D2 = dgd_ijk[l + 1]; - sumY[k][l] += D1 + D2; - M_int[k][l] += D1 * X1 + D2 * X2; - - const __m256i kl = - _mm256_cvtepu8_epi16(_mm_set1_epi16(*((uint16_t *)(dgd_ijk + l)))); - acc_stat_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl); - acc_stat_avx2(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl); - acc_stat_avx2(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl); - acc_stat_avx2(H_ + 3 * 8, dgd_ij + 3 * dgd_stride, shuffle, &kl); - acc_stat_avx2(H_ + 4 * 8, dgd_ij + 4 * dgd_stride, shuffle, &kl); - acc_stat_avx2(H_ + 5 * 8, dgd_ij + 5 * dgd_stride, shuffle, &kl); - acc_stat_avx2(H_ + 6 * 8, dgd_ij + 6 * dgd_stride, shuffle, &kl); - } - } - } -} - -static INLINE void compute_stats_win7_opt_avx2( - const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, int v_start, - int v_end, int dgd_stride, int src_stride, double *M, double *H) { - int i, j, k, l, m, n; - const int wiener_win = WIENER_WIN; - const int pixel_count = (h_end - h_start) * (v_end - v_start); - const int wiener_win2 = wiener_win * wiener_win; - const int wiener_halfwin = (wiener_win >> 1); - const double avg = - find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride); - - int32_t M_int32[WIENER_WIN][WIENER_WIN] = { { 0 } }; - int64_t M_int64[WIENER_WIN][WIENER_WIN] = { { 0 } }; - int32_t H_int32[WIENER_WIN2][WIENER_WIN * 8] = { { 0 } }; - int64_t H_int64[WIENER_WIN2][WIENER_WIN * 8] = { { 0 } }; - int32_t sumY[WIENER_WIN][WIENER_WIN] = { { 0 } }; - int32_t sumX = 0; - const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin; - - const __m128i shuffle = xx_loadu_128(g_shuffle_stats_data); - for (j = v_start; j < v_end; j += 64) { - const int vert_end = AOMMIN(64, v_end - j) + j; - for (i = j; i < vert_end; i++) { - acc_stat_win7_one_line_avx2( - dgd_win + i * dgd_stride, src + i * src_stride, h_start, h_end, - dgd_stride, &shuffle, &sumX, sumY, M_int32, H_int32); - } - for (k = 0; k < wiener_win; ++k) { - for (l = 0; l < wiener_win; ++l) { - M_int64[k][l] += M_int32[k][l]; - M_int32[k][l] = 0; - } - } - for (k = 0; k < WIENER_WIN2; ++k) { - for (l = 0; l < WIENER_WIN * 8; ++l) { - H_int64[k][l] += H_int32[k][l]; - H_int32[k][l] = 0; - } - } - } - - const double avg_square_sum = avg * avg * pixel_count; - for (k = 0; k < wiener_win; k++) { - for (l = 0; l < wiener_win; l++) { - const int32_t idx0 = l * wiener_win + k; - M[idx0] = M_int64[k][l] + avg_square_sum - avg * (sumX + sumY[k][l]); - double *H_ = H + idx0 * wiener_win2; - int64_t *H_int_ = &H_int64[idx0][0]; - for (m = 0; m < wiener_win; m++) { - for (n = 0; n < wiener_win; n++) { - H_[m * wiener_win + n] = H_int_[n * 8 + m] + avg_square_sum - - avg * (sumY[k][l] + sumY[n][m]); - } - } - } - } -} - -static INLINE void acc_stat_win5_one_line_avx2( - const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, - int dgd_stride, const __m128i *shuffle, int32_t *sumX, - int32_t sumY[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA], - int32_t M_int[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA], - int32_t H_int[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8]) { - int j, k, l; - const int wiener_win = WIENER_WIN_CHROMA; - for (j = h_start; j < h_end; j += 2) { - const uint8_t X1 = src[j]; - const uint8_t X2 = src[j + 1]; - *sumX += X1 + X2; - const uint8_t *dgd_ij = dgd + j; - for (k = 0; k < wiener_win; k++) { - const uint8_t *dgd_ijk = dgd_ij + k * dgd_stride; - for (l = 0; l < wiener_win; l++) { - int32_t *H_ = &H_int[(l * wiener_win + k)][0]; - const uint8_t D1 = dgd_ijk[l]; - const uint8_t D2 = dgd_ijk[l + 1]; - sumY[k][l] += D1 + D2; - M_int[k][l] += D1 * X1 + D2 * X2; - - const __m256i kl = - _mm256_cvtepu8_epi16(_mm_set1_epi16(*((uint16_t *)(dgd_ijk + l)))); - acc_stat_avx2(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl); - acc_stat_avx2(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl); - acc_stat_avx2(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl); - acc_stat_avx2(H_ + 3 * 8, dgd_ij + 3 * dgd_stride, shuffle, &kl); - acc_stat_avx2(H_ + 4 * 8, dgd_ij + 4 * dgd_stride, shuffle, &kl); - } - } - } -} - -static INLINE void compute_stats_win5_opt_avx2( - const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, int v_start, - int v_end, int dgd_stride, int src_stride, double *M, double *H) { - int i, j, k, l, m, n; - const int wiener_win = WIENER_WIN_CHROMA; - const int pixel_count = (h_end - h_start) * (v_end - v_start); - const int wiener_win2 = wiener_win * wiener_win; - const int wiener_halfwin = (wiener_win >> 1); - const double avg = - find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride); - - int32_t M_int32[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } }; - int64_t M_int64[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } }; - int32_t H_int32[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8] = { { 0 } }; - int64_t H_int64[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8] = { { 0 } }; - int32_t sumY[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } }; - int32_t sumX = 0; - const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin; - - const __m128i shuffle = xx_loadu_128(g_shuffle_stats_data); - for (j = v_start; j < v_end; j += 64) { - const int vert_end = AOMMIN(64, v_end - j) + j; - for (i = j; i < vert_end; i++) { - acc_stat_win5_one_line_avx2( - dgd_win + i * dgd_stride, src + i * src_stride, h_start, h_end, - dgd_stride, &shuffle, &sumX, sumY, M_int32, H_int32); - } - for (k = 0; k < wiener_win; ++k) { - for (l = 0; l < wiener_win; ++l) { - M_int64[k][l] += M_int32[k][l]; - M_int32[k][l] = 0; - } - } - for (k = 0; k < WIENER_WIN2_CHROMA; ++k) { - for (l = 0; l < WIENER_WIN_CHROMA * 8; ++l) { - H_int64[k][l] += H_int32[k][l]; - H_int32[k][l] = 0; - } - } - } - - const double avg_square_sum = avg * avg * pixel_count; - for (k = 0; k < wiener_win; k++) { - for (l = 0; l < wiener_win; l++) { - const int32_t idx0 = l * wiener_win + k; - M[idx0] = M_int64[k][l] + avg_square_sum - avg * (sumX + sumY[k][l]); - double *H_ = H + idx0 * wiener_win2; - int64_t *H_int_ = &H_int64[idx0][0]; - for (m = 0; m < wiener_win; m++) { - for (n = 0; n < wiener_win; n++) { - H_[m * wiener_win + n] = H_int_[n * 8 + m] + avg_square_sum - - avg * (sumY[k][l] + sumY[n][m]); - } - } - } - } -} - -void av1_compute_stats_avx2(int wiener_win, const uint8_t *dgd, - const uint8_t *src, int h_start, int h_end, - int v_start, int v_end, int dgd_stride, - int src_stride, double *M, double *H) { - if (wiener_win == WIENER_WIN) { - compute_stats_win7_opt_avx2(dgd, src, h_start, h_end, v_start, v_end, - dgd_stride, src_stride, M, H); - } else if (wiener_win == WIENER_WIN_CHROMA) { - compute_stats_win5_opt_avx2(dgd, src, h_start, h_end, v_start, v_end, - dgd_stride, src_stride, M, H); - } else { - av1_compute_stats_c(wiener_win, dgd, src, h_start, h_end, v_start, v_end, - dgd_stride, src_stride, M, H); - } -} - -static INLINE __m256i pair_set_epi16(uint16_t a, uint16_t b) { - return _mm256_set1_epi32( - (int32_t)(((uint16_t)(a)) | (((uint32_t)(b)) << 16))); -} - -int64_t av1_lowbd_pixel_proj_error_avx2( - const uint8_t *src8, int width, int height, int src_stride, - const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, - int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params) { - int i, j, k; - const int32_t shift = SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS; - const __m256i rounding = _mm256_set1_epi32(1 << (shift - 1)); - __m256i sum64 = _mm256_setzero_si256(); - const uint8_t *src = src8; - const uint8_t *dat = dat8; - int64_t err = 0; - if (params->r[0] > 0 && params->r[1] > 0) { - __m256i xq_coeff = pair_set_epi16(xq[0], xq[1]); - for (i = 0; i < height; ++i) { - __m256i sum32 = _mm256_setzero_si256(); - for (j = 0; j <= width - 16; j += 16) { - const __m256i d0 = _mm256_cvtepu8_epi16(xx_loadu_128(dat + j)); - const __m256i s0 = _mm256_cvtepu8_epi16(xx_loadu_128(src + j)); - const __m256i flt0_16b = _mm256_permute4x64_epi64( - _mm256_packs_epi32(yy_loadu_256(flt0 + j), - yy_loadu_256(flt0 + j + 8)), - 0xd8); - const __m256i flt1_16b = _mm256_permute4x64_epi64( - _mm256_packs_epi32(yy_loadu_256(flt1 + j), - yy_loadu_256(flt1 + j + 8)), - 0xd8); - const __m256i u0 = _mm256_slli_epi16(d0, SGRPROJ_RST_BITS); - const __m256i flt0_0_sub_u = _mm256_sub_epi16(flt0_16b, u0); - const __m256i flt1_0_sub_u = _mm256_sub_epi16(flt1_16b, u0); - const __m256i v0 = _mm256_madd_epi16( - xq_coeff, _mm256_unpacklo_epi16(flt0_0_sub_u, flt1_0_sub_u)); - const __m256i v1 = _mm256_madd_epi16( - xq_coeff, _mm256_unpackhi_epi16(flt0_0_sub_u, flt1_0_sub_u)); - const __m256i vr0 = - _mm256_srai_epi32(_mm256_add_epi32(v0, rounding), shift); - const __m256i vr1 = - _mm256_srai_epi32(_mm256_add_epi32(v1, rounding), shift); - const __m256i e0 = _mm256_sub_epi16( - _mm256_add_epi16(_mm256_packs_epi32(vr0, vr1), d0), s0); - const __m256i err0 = _mm256_madd_epi16(e0, e0); - sum32 = _mm256_add_epi32(sum32, err0); - } - for (k = j; k < width; ++k) { - const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS); - int32_t v = xq[0] * (flt0[k] - u) + xq[1] * (flt1[k] - u); - const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k]; - err += e * e; - } - dat += dat_stride; - src += src_stride; - flt0 += flt0_stride; - flt1 += flt1_stride; - const __m256i sum64_0 = - _mm256_cvtepi32_epi64(_mm256_castsi256_si128(sum32)); - const __m256i sum64_1 = - _mm256_cvtepi32_epi64(_mm256_extracti128_si256(sum32, 1)); - sum64 = _mm256_add_epi64(sum64, sum64_0); - sum64 = _mm256_add_epi64(sum64, sum64_1); - } - } else if (params->r[0] > 0) { - __m256i xq_coeff = - pair_set_epi16(xq[0], (-xq[0] * (1 << SGRPROJ_RST_BITS))); - for (i = 0; i < height; ++i) { - __m256i sum32 = _mm256_setzero_si256(); - for (j = 0; j <= width - 16; j += 16) { - const __m256i d0 = _mm256_cvtepu8_epi16(xx_loadu_128(dat + j)); - const __m256i s0 = _mm256_cvtepu8_epi16(xx_loadu_128(src + j)); - const __m256i flt0_16b = _mm256_permute4x64_epi64( - _mm256_packs_epi32(yy_loadu_256(flt0 + j), - yy_loadu_256(flt0 + j + 8)), - 0xd8); - const __m256i v0 = - _mm256_madd_epi16(xq_coeff, _mm256_unpacklo_epi16(flt0_16b, d0)); - const __m256i v1 = - _mm256_madd_epi16(xq_coeff, _mm256_unpackhi_epi16(flt0_16b, d0)); - const __m256i vr0 = - _mm256_srai_epi32(_mm256_add_epi32(v0, rounding), shift); - const __m256i vr1 = - _mm256_srai_epi32(_mm256_add_epi32(v1, rounding), shift); - const __m256i e0 = _mm256_sub_epi16( - _mm256_add_epi16(_mm256_packs_epi32(vr0, vr1), d0), s0); - const __m256i err0 = _mm256_madd_epi16(e0, e0); - sum32 = _mm256_add_epi32(sum32, err0); - } - for (k = j; k < width; ++k) { - const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS); - int32_t v = xq[0] * (flt0[k] - u); - const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k]; - err += e * e; - } - dat += dat_stride; - src += src_stride; - flt0 += flt0_stride; - const __m256i sum64_0 = - _mm256_cvtepi32_epi64(_mm256_castsi256_si128(sum32)); - const __m256i sum64_1 = - _mm256_cvtepi32_epi64(_mm256_extracti128_si256(sum32, 1)); - sum64 = _mm256_add_epi64(sum64, sum64_0); - sum64 = _mm256_add_epi64(sum64, sum64_1); - } - } else if (params->r[1] > 0) { - __m256i xq_coeff = pair_set_epi16(xq[1], -(xq[1] << SGRPROJ_RST_BITS)); - for (i = 0; i < height; ++i) { - __m256i sum32 = _mm256_setzero_si256(); - for (j = 0; j <= width - 16; j += 16) { - const __m256i d0 = _mm256_cvtepu8_epi16(xx_loadu_128(dat + j)); - const __m256i s0 = _mm256_cvtepu8_epi16(xx_loadu_128(src + j)); - const __m256i flt1_16b = _mm256_permute4x64_epi64( - _mm256_packs_epi32(yy_loadu_256(flt1 + j), - yy_loadu_256(flt1 + j + 8)), - 0xd8); - const __m256i v0 = - _mm256_madd_epi16(xq_coeff, _mm256_unpacklo_epi16(flt1_16b, d0)); - const __m256i v1 = - _mm256_madd_epi16(xq_coeff, _mm256_unpackhi_epi16(flt1_16b, d0)); - const __m256i vr0 = - _mm256_srai_epi32(_mm256_add_epi32(v0, rounding), shift); - const __m256i vr1 = - _mm256_srai_epi32(_mm256_add_epi32(v1, rounding), shift); - const __m256i e0 = _mm256_sub_epi16( - _mm256_add_epi16(_mm256_packs_epi32(vr0, vr1), d0), s0); - const __m256i err0 = _mm256_madd_epi16(e0, e0); - sum32 = _mm256_add_epi32(sum32, err0); - } - for (k = j; k < width; ++k) { - const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS); - int32_t v = xq[1] * (flt1[k] - u); - const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k]; - err += e * e; - } - dat += dat_stride; - src += src_stride; - flt1 += flt1_stride; - const __m256i sum64_0 = - _mm256_cvtepi32_epi64(_mm256_castsi256_si128(sum32)); - const __m256i sum64_1 = - _mm256_cvtepi32_epi64(_mm256_extracti128_si256(sum32, 1)); - sum64 = _mm256_add_epi64(sum64, sum64_0); - sum64 = _mm256_add_epi64(sum64, sum64_1); - } - } else { - __m256i sum32 = _mm256_setzero_si256(); - for (i = 0; i < height; ++i) { - for (j = 0; j <= width - 16; j += 16) { - const __m256i d0 = _mm256_cvtepu8_epi16(xx_loadu_128(dat + j)); - const __m256i s0 = _mm256_cvtepu8_epi16(xx_loadu_128(src + j)); - const __m256i diff0 = _mm256_sub_epi16(d0, s0); - const __m256i err0 = _mm256_madd_epi16(diff0, diff0); - sum32 = _mm256_add_epi32(sum32, err0); - } - for (k = j; k < width; ++k) { - const int32_t e = (int32_t)(dat[k]) - src[k]; - err += e * e; - } - dat += dat_stride; - src += src_stride; - } - const __m256i sum64_0 = - _mm256_cvtepi32_epi64(_mm256_castsi256_si128(sum32)); - const __m256i sum64_1 = - _mm256_cvtepi32_epi64(_mm256_extracti128_si256(sum32, 1)); - sum64 = _mm256_add_epi64(sum64_0, sum64_1); - } - int64_t sum[4]; - yy_storeu_256(sum, sum64); - err += sum[0] + sum[1] + sum[2] + sum[3]; - return err; -} diff --git a/third_party/aom/av1/encoder/x86/pickrst_sse4.c b/third_party/aom/av1/encoder/x86/pickrst_sse4.c deleted file mode 100644 index 04e4d1afc..000000000 --- a/third_party/aom/av1/encoder/x86/pickrst_sse4.c +++ /dev/null @@ -1,389 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include "aom_dsp/x86/synonyms.h" - -#include "config/av1_rtcd.h" -#include "av1/common/restoration.h" -#include "av1/encoder/pickrst.h" - -static INLINE void acc_stat_sse41(int32_t *dst, const uint8_t *src, - const __m128i *shuffle, const __m128i *kl) { - const __m128i s = _mm_shuffle_epi8(xx_loadu_128(src), *shuffle); - const __m128i d0 = _mm_madd_epi16(*kl, _mm_cvtepu8_epi16(s)); - const __m128i d1 = - _mm_madd_epi16(*kl, _mm_cvtepu8_epi16(_mm_srli_si128(s, 8))); - const __m128i dst0 = xx_loadu_128(dst); - const __m128i dst1 = xx_loadu_128(dst + 4); - const __m128i r0 = _mm_add_epi32(dst0, d0); - const __m128i r1 = _mm_add_epi32(dst1, d1); - xx_storeu_128(dst, r0); - xx_storeu_128(dst + 4, r1); -} - -static INLINE void acc_stat_win7_one_line_sse4_1( - const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, - int dgd_stride, const __m128i *shuffle, int32_t *sumX, - int32_t sumY[WIENER_WIN][WIENER_WIN], int32_t M_int[WIENER_WIN][WIENER_WIN], - int32_t H_int[WIENER_WIN2][WIENER_WIN * 8]) { - const int wiener_win = 7; - int j, k, l; - for (j = h_start; j < h_end; j += 2) { - const uint8_t *dgd_ij = dgd + j; - const uint8_t X1 = src[j]; - const uint8_t X2 = src[j + 1]; - *sumX += X1 + X2; - for (k = 0; k < wiener_win; k++) { - const uint8_t *dgd_ijk = dgd_ij + k * dgd_stride; - for (l = 0; l < wiener_win; l++) { - int32_t *H_ = &H_int[(l * wiener_win + k)][0]; - const uint8_t D1 = dgd_ijk[l]; - const uint8_t D2 = dgd_ijk[l + 1]; - sumY[k][l] += D1 + D2; - M_int[k][l] += D1 * X1 + D2 * X2; - - const __m128i kl = - _mm_cvtepu8_epi16(_mm_set1_epi16(*((uint16_t *)(dgd_ijk + l)))); - acc_stat_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl); - acc_stat_sse41(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl); - acc_stat_sse41(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl); - acc_stat_sse41(H_ + 3 * 8, dgd_ij + 3 * dgd_stride, shuffle, &kl); - acc_stat_sse41(H_ + 4 * 8, dgd_ij + 4 * dgd_stride, shuffle, &kl); - acc_stat_sse41(H_ + 5 * 8, dgd_ij + 5 * dgd_stride, shuffle, &kl); - acc_stat_sse41(H_ + 6 * 8, dgd_ij + 6 * dgd_stride, shuffle, &kl); - } - } - } -} - -static INLINE void compute_stats_win7_opt_sse4_1( - const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, int v_start, - int v_end, int dgd_stride, int src_stride, double *M, double *H) { - int i, j, k, l, m, n; - const int wiener_win = WIENER_WIN; - const int pixel_count = (h_end - h_start) * (v_end - v_start); - const int wiener_win2 = wiener_win * wiener_win; - const int wiener_halfwin = (wiener_win >> 1); - const double avg = - find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride); - - int32_t M_int32[WIENER_WIN][WIENER_WIN] = { { 0 } }; - int64_t M_int64[WIENER_WIN][WIENER_WIN] = { { 0 } }; - int32_t H_int32[WIENER_WIN2][WIENER_WIN * 8] = { { 0 } }; - int64_t H_int64[WIENER_WIN2][WIENER_WIN * 8] = { { 0 } }; - int32_t sumY[WIENER_WIN][WIENER_WIN] = { { 0 } }; - int32_t sumX = 0; - const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin; - - const __m128i shuffle = xx_loadu_128(g_shuffle_stats_data); - for (j = v_start; j < v_end; j += 64) { - const int vert_end = AOMMIN(64, v_end - j) + j; - for (i = j; i < vert_end; i++) { - acc_stat_win7_one_line_sse4_1( - dgd_win + i * dgd_stride, src + i * src_stride, h_start, h_end, - dgd_stride, &shuffle, &sumX, sumY, M_int32, H_int32); - } - for (k = 0; k < wiener_win; ++k) { - for (l = 0; l < wiener_win; ++l) { - M_int64[k][l] += M_int32[k][l]; - M_int32[k][l] = 0; - } - } - for (k = 0; k < WIENER_WIN2; ++k) { - for (l = 0; l < WIENER_WIN * 8; ++l) { - H_int64[k][l] += H_int32[k][l]; - H_int32[k][l] = 0; - } - } - } - - const double avg_square_sum = avg * avg * pixel_count; - for (k = 0; k < wiener_win; k++) { - for (l = 0; l < wiener_win; l++) { - const int32_t idx0 = l * wiener_win + k; - M[idx0] = M_int64[k][l] + avg_square_sum - avg * (sumX + sumY[k][l]); - double *H_ = H + idx0 * wiener_win2; - int64_t *H_int_ = &H_int64[idx0][0]; - for (m = 0; m < wiener_win; m++) { - for (n = 0; n < wiener_win; n++) { - H_[m * wiener_win + n] = H_int_[n * 8 + m] + avg_square_sum - - avg * (sumY[k][l] + sumY[n][m]); - } - } - } - } -} - -static INLINE void acc_stat_win5_one_line_sse4_1( - const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, - int dgd_stride, const __m128i *shuffle, int32_t *sumX, - int32_t sumY[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA], - int32_t M_int[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA], - int32_t H_int[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8]) { - const int wiener_win = WIENER_WIN_CHROMA; - int j, k, l; - for (j = h_start; j < h_end; j += 2) { - const uint8_t *dgd_ij = dgd + j; - const uint8_t X1 = src[j]; - const uint8_t X2 = src[j + 1]; - *sumX += X1 + X2; - for (k = 0; k < wiener_win; k++) { - const uint8_t *dgd_ijk = dgd_ij + k * dgd_stride; - for (l = 0; l < wiener_win; l++) { - int32_t *H_ = &H_int[(l * wiener_win + k)][0]; - const uint8_t D1 = dgd_ijk[l]; - const uint8_t D2 = dgd_ijk[l + 1]; - sumY[k][l] += D1 + D2; - M_int[k][l] += D1 * X1 + D2 * X2; - - const __m128i kl = - _mm_cvtepu8_epi16(_mm_set1_epi16(*((uint16_t *)(dgd_ijk + l)))); - acc_stat_sse41(H_ + 0 * 8, dgd_ij + 0 * dgd_stride, shuffle, &kl); - acc_stat_sse41(H_ + 1 * 8, dgd_ij + 1 * dgd_stride, shuffle, &kl); - acc_stat_sse41(H_ + 2 * 8, dgd_ij + 2 * dgd_stride, shuffle, &kl); - acc_stat_sse41(H_ + 3 * 8, dgd_ij + 3 * dgd_stride, shuffle, &kl); - acc_stat_sse41(H_ + 4 * 8, dgd_ij + 4 * dgd_stride, shuffle, &kl); - } - } - } -} - -static INLINE void compute_stats_win5_opt_sse4_1( - const uint8_t *dgd, const uint8_t *src, int h_start, int h_end, int v_start, - int v_end, int dgd_stride, int src_stride, double *M, double *H) { - int i, j, k, l, m, n; - const int wiener_win = WIENER_WIN_CHROMA; - const int pixel_count = (h_end - h_start) * (v_end - v_start); - const int wiener_win2 = wiener_win * wiener_win; - const int wiener_halfwin = (wiener_win >> 1); - const double avg = - find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride); - - int32_t M_int32[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } }; - int64_t M_int64[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } }; - int32_t H_int32[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8] = { { 0 } }; - int64_t H_int64[WIENER_WIN2_CHROMA][WIENER_WIN_CHROMA * 8] = { { 0 } }; - int32_t sumY[WIENER_WIN_CHROMA][WIENER_WIN_CHROMA] = { { 0 } }; - int32_t sumX = 0; - const uint8_t *dgd_win = dgd - wiener_halfwin * dgd_stride - wiener_halfwin; - - const __m128i shuffle = xx_loadu_128(g_shuffle_stats_data); - for (j = v_start; j < v_end; j += 64) { - const int vert_end = AOMMIN(64, v_end - j) + j; - for (i = j; i < vert_end; i++) { - acc_stat_win5_one_line_sse4_1( - dgd_win + i * dgd_stride, src + i * src_stride, h_start, h_end, - dgd_stride, &shuffle, &sumX, sumY, M_int32, H_int32); - } - for (k = 0; k < wiener_win; ++k) { - for (l = 0; l < wiener_win; ++l) { - M_int64[k][l] += M_int32[k][l]; - M_int32[k][l] = 0; - } - } - for (k = 0; k < WIENER_WIN_CHROMA * WIENER_WIN_CHROMA; ++k) { - for (l = 0; l < WIENER_WIN_CHROMA * 8; ++l) { - H_int64[k][l] += H_int32[k][l]; - H_int32[k][l] = 0; - } - } - } - - const double avg_square_sum = avg * avg * pixel_count; - for (k = 0; k < wiener_win; k++) { - for (l = 0; l < wiener_win; l++) { - const int32_t idx0 = l * wiener_win + k; - M[idx0] = M_int64[k][l] + avg_square_sum - avg * (sumX + sumY[k][l]); - double *H_ = H + idx0 * wiener_win2; - int64_t *H_int_ = &H_int64[idx0][0]; - for (m = 0; m < wiener_win; m++) { - for (n = 0; n < wiener_win; n++) { - H_[m * wiener_win + n] = H_int_[n * 8 + m] + avg_square_sum - - avg * (sumY[k][l] + sumY[n][m]); - } - } - } - } -} -void av1_compute_stats_sse4_1(int wiener_win, const uint8_t *dgd, - const uint8_t *src, int h_start, int h_end, - int v_start, int v_end, int dgd_stride, - int src_stride, double *M, double *H) { - if (wiener_win == WIENER_WIN) { - compute_stats_win7_opt_sse4_1(dgd, src, h_start, h_end, v_start, v_end, - dgd_stride, src_stride, M, H); - } else if (wiener_win == WIENER_WIN_CHROMA) { - compute_stats_win5_opt_sse4_1(dgd, src, h_start, h_end, v_start, v_end, - dgd_stride, src_stride, M, H); - } else { - av1_compute_stats_c(wiener_win, dgd, src, h_start, h_end, v_start, v_end, - dgd_stride, src_stride, M, H); - } -} - -static INLINE __m128i pair_set_epi16(uint16_t a, uint16_t b) { - return _mm_set1_epi32((int32_t)(((uint16_t)(a)) | (((uint32_t)(b)) << 16))); -} - -int64_t av1_lowbd_pixel_proj_error_sse4_1( - const uint8_t *src8, int width, int height, int src_stride, - const uint8_t *dat8, int dat_stride, int32_t *flt0, int flt0_stride, - int32_t *flt1, int flt1_stride, int xq[2], const sgr_params_type *params) { - int i, j, k; - const int32_t shift = SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS; - const __m128i rounding = _mm_set1_epi32(1 << (shift - 1)); - __m128i sum64 = _mm_setzero_si128(); - const uint8_t *src = src8; - const uint8_t *dat = dat8; - int64_t err = 0; - if (params->r[0] > 0 && params->r[1] > 0) { - __m128i xq_coeff = pair_set_epi16(xq[0], xq[1]); - for (i = 0; i < height; ++i) { - __m128i sum32 = _mm_setzero_si128(); - for (j = 0; j < width - 8; j += 8) { - const __m128i d0 = _mm_cvtepu8_epi16(xx_loadl_64(dat + j)); - const __m128i s0 = _mm_cvtepu8_epi16(xx_loadl_64(src + j)); - const __m128i flt0_16b = - _mm_packs_epi32(xx_loadu_128(flt0 + j), xx_loadu_128(flt0 + j + 4)); - const __m128i flt1_16b = - _mm_packs_epi32(xx_loadu_128(flt1 + j), xx_loadu_128(flt1 + j + 4)); - const __m128i u0 = _mm_slli_epi16(d0, SGRPROJ_RST_BITS); - const __m128i flt0_0_sub_u = _mm_sub_epi16(flt0_16b, u0); - const __m128i flt1_0_sub_u = _mm_sub_epi16(flt1_16b, u0); - const __m128i v0 = _mm_madd_epi16( - xq_coeff, _mm_unpacklo_epi16(flt0_0_sub_u, flt1_0_sub_u)); - const __m128i v1 = _mm_madd_epi16( - xq_coeff, _mm_unpackhi_epi16(flt0_0_sub_u, flt1_0_sub_u)); - const __m128i vr0 = _mm_srai_epi32(_mm_add_epi32(v0, rounding), shift); - const __m128i vr1 = _mm_srai_epi32(_mm_add_epi32(v1, rounding), shift); - const __m128i e0 = - _mm_sub_epi16(_mm_add_epi16(_mm_packs_epi32(vr0, vr1), d0), s0); - const __m128i err0 = _mm_madd_epi16(e0, e0); - sum32 = _mm_add_epi32(sum32, err0); - } - for (k = j; k < width; ++k) { - const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS); - int32_t v = xq[0] * (flt0[k] - u) + xq[1] * (flt1[k] - u); - const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k]; - err += e * e; - } - dat += dat_stride; - src += src_stride; - flt0 += flt0_stride; - flt1 += flt1_stride; - const __m128i sum64_0 = _mm_cvtepi32_epi64(sum32); - const __m128i sum64_1 = _mm_cvtepi32_epi64(_mm_srli_si128(sum32, 8)); - sum64 = _mm_add_epi64(sum64, sum64_0); - sum64 = _mm_add_epi64(sum64, sum64_1); - } - } else if (params->r[0] > 0) { - __m128i xq_coeff = pair_set_epi16(xq[0], -(xq[0] << SGRPROJ_RST_BITS)); - for (i = 0; i < height; ++i) { - __m128i sum32 = _mm_setzero_si128(); - for (j = 0; j < width - 8; j += 8) { - const __m128i d0 = _mm_cvtepu8_epi16(xx_loadl_64(dat + j)); - const __m128i s0 = _mm_cvtepu8_epi16(xx_loadl_64(src + j)); - const __m128i flt0_16b = - _mm_packs_epi32(xx_loadu_128(flt0 + j), xx_loadu_128(flt0 + j + 4)); - const __m128i v0 = - _mm_madd_epi16(xq_coeff, _mm_unpacklo_epi16(flt0_16b, d0)); - const __m128i v1 = - _mm_madd_epi16(xq_coeff, _mm_unpackhi_epi16(flt0_16b, d0)); - const __m128i vr0 = _mm_srai_epi32(_mm_add_epi32(v0, rounding), shift); - const __m128i vr1 = _mm_srai_epi32(_mm_add_epi32(v1, rounding), shift); - const __m128i e0 = - _mm_sub_epi16(_mm_add_epi16(_mm_packs_epi32(vr0, vr1), d0), s0); - const __m128i err0 = _mm_madd_epi16(e0, e0); - sum32 = _mm_add_epi32(sum32, err0); - } - for (k = j; k < width; ++k) { - const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS); - int32_t v = xq[0] * (flt0[k] - u); - const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k]; - err += e * e; - } - dat += dat_stride; - src += src_stride; - flt0 += flt0_stride; - const __m128i sum64_0 = _mm_cvtepi32_epi64(sum32); - const __m128i sum64_1 = _mm_cvtepi32_epi64(_mm_srli_si128(sum32, 8)); - sum64 = _mm_add_epi64(sum64, sum64_0); - sum64 = _mm_add_epi64(sum64, sum64_1); - } - } else if (params->r[1] > 0) { - __m128i xq_coeff = pair_set_epi16(xq[1], -(xq[1] << SGRPROJ_RST_BITS)); - for (i = 0; i < height; ++i) { - __m128i sum32 = _mm_setzero_si128(); - for (j = 0; j < width - 8; j += 8) { - const __m128i d0 = _mm_cvtepu8_epi16(xx_loadl_64(dat + j)); - const __m128i s0 = _mm_cvtepu8_epi16(xx_loadl_64(src + j)); - const __m128i flt1_16b = - _mm_packs_epi32(xx_loadu_128(flt1 + j), xx_loadu_128(flt1 + j + 4)); - const __m128i v0 = - _mm_madd_epi16(xq_coeff, _mm_unpacklo_epi16(flt1_16b, d0)); - const __m128i v1 = - _mm_madd_epi16(xq_coeff, _mm_unpackhi_epi16(flt1_16b, d0)); - const __m128i vr0 = _mm_srai_epi32(_mm_add_epi32(v0, rounding), shift); - const __m128i vr1 = _mm_srai_epi32(_mm_add_epi32(v1, rounding), shift); - const __m128i e0 = - _mm_sub_epi16(_mm_add_epi16(_mm_packs_epi32(vr0, vr1), d0), s0); - const __m128i err0 = _mm_madd_epi16(e0, e0); - sum32 = _mm_add_epi32(sum32, err0); - } - for (k = j; k < width; ++k) { - const int32_t u = (int32_t)(dat[k] << SGRPROJ_RST_BITS); - int32_t v = xq[1] * (flt1[k] - u); - const int32_t e = ROUND_POWER_OF_TWO(v, shift) + dat[k] - src[k]; - err += e * e; - } - dat += dat_stride; - src += src_stride; - flt1 += flt1_stride; - const __m128i sum64_0 = _mm_cvtepi32_epi64(sum32); - const __m128i sum64_1 = _mm_cvtepi32_epi64(_mm_srli_si128(sum32, 8)); - sum64 = _mm_add_epi64(sum64, sum64_0); - sum64 = _mm_add_epi64(sum64, sum64_1); - } - } else { - __m128i sum32 = _mm_setzero_si128(); - for (i = 0; i < height; ++i) { - for (j = 0; j < width - 16; j += 16) { - const __m128i d = xx_loadu_128(dat + j); - const __m128i s = xx_loadu_128(src + j); - const __m128i d0 = _mm_cvtepu8_epi16(d); - const __m128i d1 = _mm_cvtepu8_epi16(_mm_srli_si128(d, 8)); - const __m128i s0 = _mm_cvtepu8_epi16(s); - const __m128i s1 = _mm_cvtepu8_epi16(_mm_srli_si128(s, 8)); - const __m128i diff0 = _mm_sub_epi16(d0, s0); - const __m128i diff1 = _mm_sub_epi16(d1, s1); - const __m128i err0 = _mm_madd_epi16(diff0, diff0); - const __m128i err1 = _mm_madd_epi16(diff1, diff1); - sum32 = _mm_add_epi32(sum32, err0); - sum32 = _mm_add_epi32(sum32, err1); - } - for (k = j; k < width; ++k) { - const int32_t e = (int32_t)(dat[k]) - src[k]; - err += e * e; - } - dat += dat_stride; - src += src_stride; - } - const __m128i sum64_0 = _mm_cvtepi32_epi64(sum32); - const __m128i sum64_1 = _mm_cvtepi32_epi64(_mm_srli_si128(sum32, 8)); - sum64 = _mm_add_epi64(sum64_0, sum64_1); - } - int64_t sum[2]; - xx_storeu_128(sum, sum64); - err += sum[0] + sum[1]; - return err; -} diff --git a/third_party/aom/av1/encoder/x86/temporal_filter_apply_sse2.asm b/third_party/aom/av1/encoder/x86/temporal_filter_apply_sse2.asm deleted file mode 100644 index 30983d1c1..000000000 --- a/third_party/aom/av1/encoder/x86/temporal_filter_apply_sse2.asm +++ /dev/null @@ -1,217 +0,0 @@ -; -; Copyright (c) 2016, Alliance for Open Media. All rights reserved -; -; This source code is subject to the terms of the BSD 2 Clause License and -; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License -; was not distributed with this source code in the LICENSE file, you can -; obtain it at www.aomedia.org/license/software. If the Alliance for Open -; Media Patent License 1.0 was not distributed with this source code in the -; PATENTS file, you can obtain it at www.aomedia.org/license/patent. -; - -; - - -%include "aom_ports/x86_abi_support.asm" - -SECTION .text - -; void av1_temporal_filter_apply_sse2 | arg -; (unsigned char *frame1, | 0 -; unsigned int stride, | 1 -; unsigned char *frame2, | 2 -; unsigned int block_width, | 3 -; unsigned int block_height, | 4 -; int strength, | 5 -; int filter_weight, | 6 -; unsigned int *accumulator, | 7 -; unsigned short *count) | 8 -global sym(av1_temporal_filter_apply_sse2) PRIVATE -sym(av1_temporal_filter_apply_sse2): - - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 9 - SAVE_XMM 7 - GET_GOT rbx - push rsi - push rdi - ALIGN_STACK 16, rax - %define block_width 0 - %define block_height 16 - %define strength 32 - %define filter_weight 48 - %define rounding_bit 64 - %define rbp_backup 80 - %define stack_size 96 - sub rsp, stack_size - mov [rsp + rbp_backup], rbp - ; end prolog - - mov edx, arg(3) - mov [rsp + block_width], rdx - mov edx, arg(4) - mov [rsp + block_height], rdx - movd xmm6, arg(5) - movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read - - ; calculate the rounding bit outside the loop - ; 0x8000 >> (16 - strength) - mov rdx, 16 - sub rdx, arg(5) ; 16 - strength - movq xmm4, rdx ; can't use rdx w/ shift - movdqa xmm5, [GLOBAL(_const_top_bit)] - psrlw xmm5, xmm4 - movdqa [rsp + rounding_bit], xmm5 - - mov rsi, arg(0) ; src/frame1 - mov rdx, arg(2) ; predictor frame - mov rdi, arg(7) ; accumulator - mov rax, arg(8) ; count - - ; dup the filter weight and store for later - movd xmm0, arg(6) ; filter_weight - pshuflw xmm0, xmm0, 0 - punpcklwd xmm0, xmm0 - movdqa [rsp + filter_weight], xmm0 - - mov rbp, arg(1) ; stride - pxor xmm7, xmm7 ; zero for extraction - - mov rcx, [rsp + block_width] - imul rcx, [rsp + block_height] - add rcx, rdx - cmp dword ptr [rsp + block_width], 8 - jne .temporal_filter_apply_load_16 - -.temporal_filter_apply_load_8: - movq xmm0, [rsi] ; first row - lea rsi, [rsi + rbp] ; += stride - punpcklbw xmm0, xmm7 ; src[ 0- 7] - movq xmm1, [rsi] ; second row - lea rsi, [rsi + rbp] ; += stride - punpcklbw xmm1, xmm7 ; src[ 8-15] - jmp .temporal_filter_apply_load_finished - -.temporal_filter_apply_load_16: - movdqa xmm0, [rsi] ; src (frame1) - lea rsi, [rsi + rbp] ; += stride - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm7 ; src[ 0- 7] - punpckhbw xmm1, xmm7 ; src[ 8-15] - -.temporal_filter_apply_load_finished: - movdqa xmm2, [rdx] ; predictor (frame2) - movdqa xmm3, xmm2 - punpcklbw xmm2, xmm7 ; pred[ 0- 7] - punpckhbw xmm3, xmm7 ; pred[ 8-15] - - ; modifier = src_byte - pixel_value - psubw xmm0, xmm2 ; src - pred[ 0- 7] - psubw xmm1, xmm3 ; src - pred[ 8-15] - - ; modifier *= modifier - pmullw xmm0, xmm0 ; modifer[ 0- 7]^2 - pmullw xmm1, xmm1 ; modifer[ 8-15]^2 - - ; modifier *= 3 - pmullw xmm0, [GLOBAL(_const_3w)] - pmullw xmm1, [GLOBAL(_const_3w)] - - ; modifer += 0x8000 >> (16 - strength) - paddw xmm0, [rsp + rounding_bit] - paddw xmm1, [rsp + rounding_bit] - - ; modifier >>= strength - psrlw xmm0, [rsp + strength] - psrlw xmm1, [rsp + strength] - - ; modifier = 16 - modifier - ; saturation takes care of modifier > 16 - movdqa xmm3, [GLOBAL(_const_16w)] - movdqa xmm2, [GLOBAL(_const_16w)] - psubusw xmm3, xmm1 - psubusw xmm2, xmm0 - - ; modifier *= filter_weight - pmullw xmm2, [rsp + filter_weight] - pmullw xmm3, [rsp + filter_weight] - - ; count - movdqa xmm4, [rax] - movdqa xmm5, [rax+16] - ; += modifier - paddw xmm4, xmm2 - paddw xmm5, xmm3 - ; write back - movdqa [rax], xmm4 - movdqa [rax+16], xmm5 - lea rax, [rax + 16*2] ; count += 16*(sizeof(short)) - - ; load and extract the predictor up to shorts - pxor xmm7, xmm7 - movdqa xmm0, [rdx] - lea rdx, [rdx + 16*1] ; pred += 16*(sizeof(char)) - movdqa xmm1, xmm0 - punpcklbw xmm0, xmm7 ; pred[ 0- 7] - punpckhbw xmm1, xmm7 ; pred[ 8-15] - - ; modifier *= pixel_value - pmullw xmm0, xmm2 - pmullw xmm1, xmm3 - - ; expand to double words - movdqa xmm2, xmm0 - punpcklwd xmm0, xmm7 ; [ 0- 3] - punpckhwd xmm2, xmm7 ; [ 4- 7] - movdqa xmm3, xmm1 - punpcklwd xmm1, xmm7 ; [ 8-11] - punpckhwd xmm3, xmm7 ; [12-15] - - ; accumulator - movdqa xmm4, [rdi] - movdqa xmm5, [rdi+16] - movdqa xmm6, [rdi+32] - movdqa xmm7, [rdi+48] - ; += modifier - paddd xmm4, xmm0 - paddd xmm5, xmm2 - paddd xmm6, xmm1 - paddd xmm7, xmm3 - ; write back - movdqa [rdi], xmm4 - movdqa [rdi+16], xmm5 - movdqa [rdi+32], xmm6 - movdqa [rdi+48], xmm7 - lea rdi, [rdi + 16*4] ; accumulator += 16*(sizeof(int)) - - cmp rdx, rcx - je .temporal_filter_apply_epilog - pxor xmm7, xmm7 ; zero for extraction - cmp dword ptr [rsp + block_width], 16 - je .temporal_filter_apply_load_16 - jmp .temporal_filter_apply_load_8 - -.temporal_filter_apply_epilog: - ; begin epilog - mov rbp, [rsp + rbp_backup] - add rsp, stack_size - pop rsp - pop rdi - pop rsi - RESTORE_GOT - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -SECTION_RODATA -align 16 -_const_3w: - times 8 dw 3 -align 16 -_const_top_bit: - times 8 dw 1<<15 -align 16 -_const_16w: - times 8 dw 16 diff --git a/third_party/aom/av1/encoder/x86/wedge_utils_avx2.c b/third_party/aom/av1/encoder/x86/wedge_utils_avx2.c deleted file mode 100644 index 2a792f14e..000000000 --- a/third_party/aom/av1/encoder/x86/wedge_utils_avx2.c +++ /dev/null @@ -1,215 +0,0 @@ -/* - * Copyright (c) 2018, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include -#include - -#include "aom_dsp/x86/synonyms.h" -#include "aom_dsp/x86/synonyms_avx2.h" -#include "aom/aom_integer.h" - -#include "av1/common/reconinter.h" - -#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS) - -/** - * See av1_wedge_sse_from_residuals_c - */ -uint64_t av1_wedge_sse_from_residuals_avx2(const int16_t *r1, const int16_t *d, - const uint8_t *m, int N) { - int n = -N; - - uint64_t csse; - - const __m256i v_mask_max_w = _mm256_set1_epi16(MAX_MASK_VALUE); - const __m256i v_zext_q = yy_set1_64_from_32i(0xffffffff); - - __m256i v_acc0_q = _mm256_setzero_si256(); - - assert(N % 64 == 0); - - r1 += N; - d += N; - m += N; - - do { - const __m256i v_r0_w = _mm256_lddqu_si256((__m256i *)(r1 + n)); - const __m256i v_d0_w = _mm256_lddqu_si256((__m256i *)(d + n)); - const __m128i v_m01_b = _mm_lddqu_si128((__m128i *)(m + n)); - - const __m256i v_rd0l_w = _mm256_unpacklo_epi16(v_d0_w, v_r0_w); - const __m256i v_rd0h_w = _mm256_unpackhi_epi16(v_d0_w, v_r0_w); - const __m256i v_m0_w = _mm256_cvtepu8_epi16(v_m01_b); - - const __m256i v_m0l_w = _mm256_unpacklo_epi16(v_m0_w, v_mask_max_w); - const __m256i v_m0h_w = _mm256_unpackhi_epi16(v_m0_w, v_mask_max_w); - - const __m256i v_t0l_d = _mm256_madd_epi16(v_rd0l_w, v_m0l_w); - const __m256i v_t0h_d = _mm256_madd_epi16(v_rd0h_w, v_m0h_w); - - const __m256i v_t0_w = _mm256_packs_epi32(v_t0l_d, v_t0h_d); - - const __m256i v_sq0_d = _mm256_madd_epi16(v_t0_w, v_t0_w); - - const __m256i v_sum0_q = _mm256_add_epi64( - _mm256_and_si256(v_sq0_d, v_zext_q), _mm256_srli_epi64(v_sq0_d, 32)); - - v_acc0_q = _mm256_add_epi64(v_acc0_q, v_sum0_q); - - n += 16; - } while (n); - - v_acc0_q = _mm256_add_epi64(v_acc0_q, _mm256_srli_si256(v_acc0_q, 8)); - __m128i v_acc_q_0 = _mm256_castsi256_si128(v_acc0_q); - __m128i v_acc_q_1 = _mm256_extracti128_si256(v_acc0_q, 1); - v_acc_q_0 = _mm_add_epi64(v_acc_q_0, v_acc_q_1); -#if ARCH_X86_64 - csse = (uint64_t)_mm_extract_epi64(v_acc_q_0, 0); -#else - xx_storel_64(&csse, v_acc_q_0); -#endif - - return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS); -} - -/** - * See av1_wedge_sign_from_residuals_c - */ -int av1_wedge_sign_from_residuals_avx2(const int16_t *ds, const uint8_t *m, - int N, int64_t limit) { - int64_t acc; - __m256i v_acc0_d = _mm256_setzero_si256(); - - // Input size limited to 8192 by the use of 32 bit accumulators and m - // being between [0, 64]. Overflow might happen at larger sizes, - // though it is practically impossible on real video input. - assert(N < 8192); - assert(N % 64 == 0); - - do { - const __m256i v_m01_b = _mm256_lddqu_si256((__m256i *)(m)); - const __m256i v_m23_b = _mm256_lddqu_si256((__m256i *)(m + 32)); - - const __m256i v_d0_w = _mm256_lddqu_si256((__m256i *)(ds)); - const __m256i v_d1_w = _mm256_lddqu_si256((__m256i *)(ds + 16)); - const __m256i v_d2_w = _mm256_lddqu_si256((__m256i *)(ds + 32)); - const __m256i v_d3_w = _mm256_lddqu_si256((__m256i *)(ds + 48)); - - const __m256i v_m0_w = - _mm256_cvtepu8_epi16(_mm256_castsi256_si128(v_m01_b)); - const __m256i v_m1_w = - _mm256_cvtepu8_epi16(_mm256_extracti128_si256(v_m01_b, 1)); - const __m256i v_m2_w = - _mm256_cvtepu8_epi16(_mm256_castsi256_si128(v_m23_b)); - const __m256i v_m3_w = - _mm256_cvtepu8_epi16(_mm256_extracti128_si256(v_m23_b, 1)); - - const __m256i v_p0_d = _mm256_madd_epi16(v_d0_w, v_m0_w); - const __m256i v_p1_d = _mm256_madd_epi16(v_d1_w, v_m1_w); - const __m256i v_p2_d = _mm256_madd_epi16(v_d2_w, v_m2_w); - const __m256i v_p3_d = _mm256_madd_epi16(v_d3_w, v_m3_w); - - const __m256i v_p01_d = _mm256_add_epi32(v_p0_d, v_p1_d); - const __m256i v_p23_d = _mm256_add_epi32(v_p2_d, v_p3_d); - - const __m256i v_p0123_d = _mm256_add_epi32(v_p01_d, v_p23_d); - - v_acc0_d = _mm256_add_epi32(v_acc0_d, v_p0123_d); - - ds += 64; - m += 64; - - N -= 64; - } while (N); - - __m256i v_sign_d = _mm256_srai_epi32(v_acc0_d, 31); - v_acc0_d = _mm256_add_epi64(_mm256_unpacklo_epi32(v_acc0_d, v_sign_d), - _mm256_unpackhi_epi32(v_acc0_d, v_sign_d)); - - __m256i v_acc_q = _mm256_add_epi64(v_acc0_d, _mm256_srli_si256(v_acc0_d, 8)); - - __m128i v_acc_q_0 = _mm256_castsi256_si128(v_acc_q); - __m128i v_acc_q_1 = _mm256_extracti128_si256(v_acc_q, 1); - v_acc_q_0 = _mm_add_epi64(v_acc_q_0, v_acc_q_1); - -#if ARCH_X86_64 - acc = (uint64_t)_mm_extract_epi64(v_acc_q_0, 0); -#else - xx_storel_64(&acc, v_acc_q_0); -#endif - - return acc > limit; -} - -/** - * av1_wedge_compute_delta_squares_c - */ -void av1_wedge_compute_delta_squares_avx2(int16_t *d, const int16_t *a, - const int16_t *b, int N) { - const __m256i v_neg_w = _mm256_set1_epi32(0xffff0001); - - assert(N % 64 == 0); - - do { - const __m256i v_a0_w = _mm256_lddqu_si256((__m256i *)(a)); - const __m256i v_b0_w = _mm256_lddqu_si256((__m256i *)(b)); - const __m256i v_a1_w = _mm256_lddqu_si256((__m256i *)(a + 16)); - const __m256i v_b1_w = _mm256_lddqu_si256((__m256i *)(b + 16)); - const __m256i v_a2_w = _mm256_lddqu_si256((__m256i *)(a + 32)); - const __m256i v_b2_w = _mm256_lddqu_si256((__m256i *)(b + 32)); - const __m256i v_a3_w = _mm256_lddqu_si256((__m256i *)(a + 48)); - const __m256i v_b3_w = _mm256_lddqu_si256((__m256i *)(b + 48)); - - const __m256i v_ab0l_w = _mm256_unpacklo_epi16(v_a0_w, v_b0_w); - const __m256i v_ab0h_w = _mm256_unpackhi_epi16(v_a0_w, v_b0_w); - const __m256i v_ab1l_w = _mm256_unpacklo_epi16(v_a1_w, v_b1_w); - const __m256i v_ab1h_w = _mm256_unpackhi_epi16(v_a1_w, v_b1_w); - const __m256i v_ab2l_w = _mm256_unpacklo_epi16(v_a2_w, v_b2_w); - const __m256i v_ab2h_w = _mm256_unpackhi_epi16(v_a2_w, v_b2_w); - const __m256i v_ab3l_w = _mm256_unpacklo_epi16(v_a3_w, v_b3_w); - const __m256i v_ab3h_w = _mm256_unpackhi_epi16(v_a3_w, v_b3_w); - - // Negate top word of pairs - const __m256i v_abl0n_w = _mm256_sign_epi16(v_ab0l_w, v_neg_w); - const __m256i v_abh0n_w = _mm256_sign_epi16(v_ab0h_w, v_neg_w); - const __m256i v_abl1n_w = _mm256_sign_epi16(v_ab1l_w, v_neg_w); - const __m256i v_abh1n_w = _mm256_sign_epi16(v_ab1h_w, v_neg_w); - const __m256i v_abl2n_w = _mm256_sign_epi16(v_ab2l_w, v_neg_w); - const __m256i v_abh2n_w = _mm256_sign_epi16(v_ab2h_w, v_neg_w); - const __m256i v_abl3n_w = _mm256_sign_epi16(v_ab3l_w, v_neg_w); - const __m256i v_abh3n_w = _mm256_sign_epi16(v_ab3h_w, v_neg_w); - - const __m256i v_r0l_w = _mm256_madd_epi16(v_ab0l_w, v_abl0n_w); - const __m256i v_r0h_w = _mm256_madd_epi16(v_ab0h_w, v_abh0n_w); - const __m256i v_r1l_w = _mm256_madd_epi16(v_ab1l_w, v_abl1n_w); - const __m256i v_r1h_w = _mm256_madd_epi16(v_ab1h_w, v_abh1n_w); - const __m256i v_r2l_w = _mm256_madd_epi16(v_ab2l_w, v_abl2n_w); - const __m256i v_r2h_w = _mm256_madd_epi16(v_ab2h_w, v_abh2n_w); - const __m256i v_r3l_w = _mm256_madd_epi16(v_ab3l_w, v_abl3n_w); - const __m256i v_r3h_w = _mm256_madd_epi16(v_ab3h_w, v_abh3n_w); - - const __m256i v_r0_w = _mm256_packs_epi32(v_r0l_w, v_r0h_w); - const __m256i v_r1_w = _mm256_packs_epi32(v_r1l_w, v_r1h_w); - const __m256i v_r2_w = _mm256_packs_epi32(v_r2l_w, v_r2h_w); - const __m256i v_r3_w = _mm256_packs_epi32(v_r3l_w, v_r3h_w); - - _mm256_store_si256((__m256i *)(d), v_r0_w); - _mm256_store_si256((__m256i *)(d + 16), v_r1_w); - _mm256_store_si256((__m256i *)(d + 32), v_r2_w); - _mm256_store_si256((__m256i *)(d + 48), v_r3_w); - - a += 64; - b += 64; - d += 64; - N -= 64; - } while (N); -} diff --git a/third_party/aom/av1/encoder/x86/wedge_utils_sse2.c b/third_party/aom/av1/encoder/x86/wedge_utils_sse2.c deleted file mode 100644 index 4d2e99f25..000000000 --- a/third_party/aom/av1/encoder/x86/wedge_utils_sse2.c +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Copyright (c) 2016, Alliance for Open Media. All rights reserved - * - * This source code is subject to the terms of the BSD 2 Clause License and - * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License - * was not distributed with this source code in the LICENSE file, you can - * obtain it at www.aomedia.org/license/software. If the Alliance for Open - * Media Patent License 1.0 was not distributed with this source code in the - * PATENTS file, you can obtain it at www.aomedia.org/license/patent. - */ - -#include -#include - -#include "aom_dsp/x86/synonyms.h" - -#include "aom/aom_integer.h" - -#include "av1/common/reconinter.h" - -#define MAX_MASK_VALUE (1 << WEDGE_WEIGHT_BITS) - -/** - * See av1_wedge_sse_from_residuals_c - */ -uint64_t av1_wedge_sse_from_residuals_sse2(const int16_t *r1, const int16_t *d, - const uint8_t *m, int N) { - int n = -N; - int n8 = n + 8; - - uint64_t csse; - - const __m128i v_mask_max_w = _mm_set1_epi16(MAX_MASK_VALUE); - const __m128i v_zext_q = xx_set1_64_from_32i(0xffffffff); - - __m128i v_acc0_q = _mm_setzero_si128(); - - assert(N % 64 == 0); - - r1 += N; - d += N; - m += N; - - do { - const __m128i v_r0_w = xx_load_128(r1 + n); - const __m128i v_r1_w = xx_load_128(r1 + n8); - const __m128i v_d0_w = xx_load_128(d + n); - const __m128i v_d1_w = xx_load_128(d + n8); - const __m128i v_m01_b = xx_load_128(m + n); - - const __m128i v_rd0l_w = _mm_unpacklo_epi16(v_d0_w, v_r0_w); - const __m128i v_rd0h_w = _mm_unpackhi_epi16(v_d0_w, v_r0_w); - const __m128i v_rd1l_w = _mm_unpacklo_epi16(v_d1_w, v_r1_w); - const __m128i v_rd1h_w = _mm_unpackhi_epi16(v_d1_w, v_r1_w); - const __m128i v_m0_w = _mm_unpacklo_epi8(v_m01_b, _mm_setzero_si128()); - const __m128i v_m1_w = _mm_unpackhi_epi8(v_m01_b, _mm_setzero_si128()); - - const __m128i v_m0l_w = _mm_unpacklo_epi16(v_m0_w, v_mask_max_w); - const __m128i v_m0h_w = _mm_unpackhi_epi16(v_m0_w, v_mask_max_w); - const __m128i v_m1l_w = _mm_unpacklo_epi16(v_m1_w, v_mask_max_w); - const __m128i v_m1h_w = _mm_unpackhi_epi16(v_m1_w, v_mask_max_w); - - const __m128i v_t0l_d = _mm_madd_epi16(v_rd0l_w, v_m0l_w); - const __m128i v_t0h_d = _mm_madd_epi16(v_rd0h_w, v_m0h_w); - const __m128i v_t1l_d = _mm_madd_epi16(v_rd1l_w, v_m1l_w); - const __m128i v_t1h_d = _mm_madd_epi16(v_rd1h_w, v_m1h_w); - - const __m128i v_t0_w = _mm_packs_epi32(v_t0l_d, v_t0h_d); - const __m128i v_t1_w = _mm_packs_epi32(v_t1l_d, v_t1h_d); - - const __m128i v_sq0_d = _mm_madd_epi16(v_t0_w, v_t0_w); - const __m128i v_sq1_d = _mm_madd_epi16(v_t1_w, v_t1_w); - - const __m128i v_sum0_q = _mm_add_epi64(_mm_and_si128(v_sq0_d, v_zext_q), - _mm_srli_epi64(v_sq0_d, 32)); - const __m128i v_sum1_q = _mm_add_epi64(_mm_and_si128(v_sq1_d, v_zext_q), - _mm_srli_epi64(v_sq1_d, 32)); - - v_acc0_q = _mm_add_epi64(v_acc0_q, v_sum0_q); - v_acc0_q = _mm_add_epi64(v_acc0_q, v_sum1_q); - - n8 += 16; - n += 16; - } while (n); - - v_acc0_q = _mm_add_epi64(v_acc0_q, _mm_srli_si128(v_acc0_q, 8)); - -#if ARCH_X86_64 - csse = (uint64_t)_mm_cvtsi128_si64(v_acc0_q); -#else - xx_storel_64(&csse, v_acc0_q); -#endif - - return ROUND_POWER_OF_TWO(csse, 2 * WEDGE_WEIGHT_BITS); -} - -/** - * See av1_wedge_sign_from_residuals_c - */ -int av1_wedge_sign_from_residuals_sse2(const int16_t *ds, const uint8_t *m, - int N, int64_t limit) { - int64_t acc; - - __m128i v_sign_d; - __m128i v_acc0_d = _mm_setzero_si128(); - __m128i v_acc1_d = _mm_setzero_si128(); - __m128i v_acc_q; - - // Input size limited to 8192 by the use of 32 bit accumulators and m - // being between [0, 64]. Overflow might happen at larger sizes, - // though it is practically impossible on real video input. - assert(N < 8192); - assert(N % 64 == 0); - - do { - const __m128i v_m01_b = xx_load_128(m); - const __m128i v_m23_b = xx_load_128(m + 16); - const __m128i v_m45_b = xx_load_128(m + 32); - const __m128i v_m67_b = xx_load_128(m + 48); - - const __m128i v_d0_w = xx_load_128(ds); - const __m128i v_d1_w = xx_load_128(ds + 8); - const __m128i v_d2_w = xx_load_128(ds + 16); - const __m128i v_d3_w = xx_load_128(ds + 24); - const __m128i v_d4_w = xx_load_128(ds + 32); - const __m128i v_d5_w = xx_load_128(ds + 40); - const __m128i v_d6_w = xx_load_128(ds + 48); - const __m128i v_d7_w = xx_load_128(ds + 56); - - const __m128i v_m0_w = _mm_unpacklo_epi8(v_m01_b, _mm_setzero_si128()); - const __m128i v_m1_w = _mm_unpackhi_epi8(v_m01_b, _mm_setzero_si128()); - const __m128i v_m2_w = _mm_unpacklo_epi8(v_m23_b, _mm_setzero_si128()); - const __m128i v_m3_w = _mm_unpackhi_epi8(v_m23_b, _mm_setzero_si128()); - const __m128i v_m4_w = _mm_unpacklo_epi8(v_m45_b, _mm_setzero_si128()); - const __m128i v_m5_w = _mm_unpackhi_epi8(v_m45_b, _mm_setzero_si128()); - const __m128i v_m6_w = _mm_unpacklo_epi8(v_m67_b, _mm_setzero_si128()); - const __m128i v_m7_w = _mm_unpackhi_epi8(v_m67_b, _mm_setzero_si128()); - - const __m128i v_p0_d = _mm_madd_epi16(v_d0_w, v_m0_w); - const __m128i v_p1_d = _mm_madd_epi16(v_d1_w, v_m1_w); - const __m128i v_p2_d = _mm_madd_epi16(v_d2_w, v_m2_w); - const __m128i v_p3_d = _mm_madd_epi16(v_d3_w, v_m3_w); - const __m128i v_p4_d = _mm_madd_epi16(v_d4_w, v_m4_w); - const __m128i v_p5_d = _mm_madd_epi16(v_d5_w, v_m5_w); - const __m128i v_p6_d = _mm_madd_epi16(v_d6_w, v_m6_w); - const __m128i v_p7_d = _mm_madd_epi16(v_d7_w, v_m7_w); - - const __m128i v_p01_d = _mm_add_epi32(v_p0_d, v_p1_d); - const __m128i v_p23_d = _mm_add_epi32(v_p2_d, v_p3_d); - const __m128i v_p45_d = _mm_add_epi32(v_p4_d, v_p5_d); - const __m128i v_p67_d = _mm_add_epi32(v_p6_d, v_p7_d); - - const __m128i v_p0123_d = _mm_add_epi32(v_p01_d, v_p23_d); - const __m128i v_p4567_d = _mm_add_epi32(v_p45_d, v_p67_d); - - v_acc0_d = _mm_add_epi32(v_acc0_d, v_p0123_d); - v_acc1_d = _mm_add_epi32(v_acc1_d, v_p4567_d); - - ds += 64; - m += 64; - - N -= 64; - } while (N); - - v_sign_d = _mm_cmplt_epi32(v_acc0_d, _mm_setzero_si128()); - v_acc0_d = _mm_add_epi64(_mm_unpacklo_epi32(v_acc0_d, v_sign_d), - _mm_unpackhi_epi32(v_acc0_d, v_sign_d)); - - v_sign_d = _mm_cmplt_epi32(v_acc1_d, _mm_setzero_si128()); - v_acc1_d = _mm_add_epi64(_mm_unpacklo_epi32(v_acc1_d, v_sign_d), - _mm_unpackhi_epi32(v_acc1_d, v_sign_d)); - - v_acc_q = _mm_add_epi64(v_acc0_d, v_acc1_d); - - v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8)); - -#if ARCH_X86_64 - acc = (uint64_t)_mm_cvtsi128_si64(v_acc_q); -#else - xx_storel_64(&acc, v_acc_q); -#endif - - return acc > limit; -} - -// Negate under mask -static INLINE __m128i negm_epi16(__m128i v_v_w, __m128i v_mask_w) { - return _mm_sub_epi16(_mm_xor_si128(v_v_w, v_mask_w), v_mask_w); -} - -/** - * av1_wedge_compute_delta_squares_c - */ -void av1_wedge_compute_delta_squares_sse2(int16_t *d, const int16_t *a, - const int16_t *b, int N) { - const __m128i v_neg_w = - _mm_set_epi16(0xffff, 0, 0xffff, 0, 0xffff, 0, 0xffff, 0); - - assert(N % 64 == 0); - - do { - const __m128i v_a0_w = xx_load_128(a); - const __m128i v_b0_w = xx_load_128(b); - const __m128i v_a1_w = xx_load_128(a + 8); - const __m128i v_b1_w = xx_load_128(b + 8); - const __m128i v_a2_w = xx_load_128(a + 16); - const __m128i v_b2_w = xx_load_128(b + 16); - const __m128i v_a3_w = xx_load_128(a + 24); - const __m128i v_b3_w = xx_load_128(b + 24); - - const __m128i v_ab0l_w = _mm_unpacklo_epi16(v_a0_w, v_b0_w); - const __m128i v_ab0h_w = _mm_unpackhi_epi16(v_a0_w, v_b0_w); - const __m128i v_ab1l_w = _mm_unpacklo_epi16(v_a1_w, v_b1_w); - const __m128i v_ab1h_w = _mm_unpackhi_epi16(v_a1_w, v_b1_w); - const __m128i v_ab2l_w = _mm_unpacklo_epi16(v_a2_w, v_b2_w); - const __m128i v_ab2h_w = _mm_unpackhi_epi16(v_a2_w, v_b2_w); - const __m128i v_ab3l_w = _mm_unpacklo_epi16(v_a3_w, v_b3_w); - const __m128i v_ab3h_w = _mm_unpackhi_epi16(v_a3_w, v_b3_w); - - // Negate top word of pairs - const __m128i v_abl0n_w = negm_epi16(v_ab0l_w, v_neg_w); - const __m128i v_abh0n_w = negm_epi16(v_ab0h_w, v_neg_w); - const __m128i v_abl1n_w = negm_epi16(v_ab1l_w, v_neg_w); - const __m128i v_abh1n_w = negm_epi16(v_ab1h_w, v_neg_w); - const __m128i v_abl2n_w = negm_epi16(v_ab2l_w, v_neg_w); - const __m128i v_abh2n_w = negm_epi16(v_ab2h_w, v_neg_w); - const __m128i v_abl3n_w = negm_epi16(v_ab3l_w, v_neg_w); - const __m128i v_abh3n_w = negm_epi16(v_ab3h_w, v_neg_w); - - const __m128i v_r0l_w = _mm_madd_epi16(v_ab0l_w, v_abl0n_w); - const __m128i v_r0h_w = _mm_madd_epi16(v_ab0h_w, v_abh0n_w); - const __m128i v_r1l_w = _mm_madd_epi16(v_ab1l_w, v_abl1n_w); - const __m128i v_r1h_w = _mm_madd_epi16(v_ab1h_w, v_abh1n_w); - const __m128i v_r2l_w = _mm_madd_epi16(v_ab2l_w, v_abl2n_w); - const __m128i v_r2h_w = _mm_madd_epi16(v_ab2h_w, v_abh2n_w); - const __m128i v_r3l_w = _mm_madd_epi16(v_ab3l_w, v_abl3n_w); - const __m128i v_r3h_w = _mm_madd_epi16(v_ab3h_w, v_abh3n_w); - - const __m128i v_r0_w = _mm_packs_epi32(v_r0l_w, v_r0h_w); - const __m128i v_r1_w = _mm_packs_epi32(v_r1l_w, v_r1h_w); - const __m128i v_r2_w = _mm_packs_epi32(v_r2l_w, v_r2h_w); - const __m128i v_r3_w = _mm_packs_epi32(v_r3l_w, v_r3h_w); - - xx_store_128(d, v_r0_w); - xx_store_128(d + 8, v_r1_w); - xx_store_128(d + 16, v_r2_w); - xx_store_128(d + 24, v_r3_w); - - a += 32; - b += 32; - d += 32; - N -= 32; - } while (N); -} -- cgit v1.2.3